net/virtio: fix incorrect cast of void *
[dpdk.git] / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45
46 #include "ixgbe_logs.h"
47 #include "base/ixgbe_api.h"
48 #include "base/ixgbe_vf.h"
49 #include "ixgbe_ethdev.h"
50 #include "base/ixgbe_dcb.h"
51 #include "base/ixgbe_common.h"
52 #include "ixgbe_rxtx.h"
53
54 #ifdef RTE_LIBRTE_IEEE1588
55 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
56 #else
57 #define IXGBE_TX_IEEE1588_TMST 0
58 #endif
59 /* Bit Mask to indicate what bits required for building TX context */
60 #define IXGBE_TX_OFFLOAD_MASK (                  \
61                 PKT_TX_OUTER_IPV6 |              \
62                 PKT_TX_OUTER_IPV4 |              \
63                 PKT_TX_IPV6 |                    \
64                 PKT_TX_IPV4 |                    \
65                 PKT_TX_VLAN_PKT |                \
66                 PKT_TX_IP_CKSUM |                \
67                 PKT_TX_L4_MASK |                 \
68                 PKT_TX_TCP_SEG |                 \
69                 PKT_TX_MACSEC |                  \
70                 PKT_TX_OUTER_IP_CKSUM |          \
71                 PKT_TX_SEC_OFFLOAD |     \
72                 IXGBE_TX_IEEE1588_TMST)
73
74 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
75                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
76
77 #if 1
78 #define RTE_PMD_USE_PREFETCH
79 #endif
80
81 #ifdef RTE_PMD_USE_PREFETCH
82 /*
83  * Prefetch a cache line into all cache levels.
84  */
85 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
86 #else
87 #define rte_ixgbe_prefetch(p)   do {} while (0)
88 #endif
89
90 /*********************************************************************
91  *
92  *  TX functions
93  *
94  **********************************************************************/
95
96 /*
97  * Check for descriptors with their DD bit set and free mbufs.
98  * Return the total number of buffers freed.
99  */
100 static __rte_always_inline int
101 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
102 {
103         struct ixgbe_tx_entry *txep;
104         uint32_t status;
105         int i, nb_free = 0;
106         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
107
108         /* check DD bit on threshold descriptor */
109         status = txq->tx_ring[txq->tx_next_dd].wb.status;
110         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
111                 return 0;
112
113         /*
114          * first buffer to free from S/W ring is at index
115          * tx_next_dd - (tx_rs_thresh-1)
116          */
117         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
118
119         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
120                 /* free buffers one at a time */
121                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
122                 txep->mbuf = NULL;
123
124                 if (unlikely(m == NULL))
125                         continue;
126
127                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
128                     (nb_free > 0 && m->pool != free[0]->pool)) {
129                         rte_mempool_put_bulk(free[0]->pool,
130                                              (void **)free, nb_free);
131                         nb_free = 0;
132                 }
133
134                 free[nb_free++] = m;
135         }
136
137         if (nb_free > 0)
138                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
139
140         /* buffers were freed, update counters */
141         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
142         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
143         if (txq->tx_next_dd >= txq->nb_tx_desc)
144                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
145
146         return txq->tx_rs_thresh;
147 }
148
149 /* Populate 4 descriptors with data from 4 mbufs */
150 static inline void
151 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
152 {
153         uint64_t buf_dma_addr;
154         uint32_t pkt_len;
155         int i;
156
157         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
158                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
159                 pkt_len = (*pkts)->data_len;
160
161                 /* write data to descriptor */
162                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
163
164                 txdp->read.cmd_type_len =
165                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
166
167                 txdp->read.olinfo_status =
168                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
169
170                 rte_prefetch0(&(*pkts)->pool);
171         }
172 }
173
174 /* Populate 1 descriptor with data from 1 mbuf */
175 static inline void
176 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
177 {
178         uint64_t buf_dma_addr;
179         uint32_t pkt_len;
180
181         buf_dma_addr = rte_mbuf_data_iova(*pkts);
182         pkt_len = (*pkts)->data_len;
183
184         /* write data to descriptor */
185         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
186         txdp->read.cmd_type_len =
187                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
188         txdp->read.olinfo_status =
189                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
190         rte_prefetch0(&(*pkts)->pool);
191 }
192
193 /*
194  * Fill H/W descriptor ring with mbuf data.
195  * Copy mbuf pointers to the S/W ring.
196  */
197 static inline void
198 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
199                       uint16_t nb_pkts)
200 {
201         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
202         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
203         const int N_PER_LOOP = 4;
204         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
205         int mainpart, leftover;
206         int i, j;
207
208         /*
209          * Process most of the packets in chunks of N pkts.  Any
210          * leftover packets will get processed one at a time.
211          */
212         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
213         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
214         for (i = 0; i < mainpart; i += N_PER_LOOP) {
215                 /* Copy N mbuf pointers to the S/W ring */
216                 for (j = 0; j < N_PER_LOOP; ++j) {
217                         (txep + i + j)->mbuf = *(pkts + i + j);
218                 }
219                 tx4(txdp + i, pkts + i);
220         }
221
222         if (unlikely(leftover > 0)) {
223                 for (i = 0; i < leftover; ++i) {
224                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
225                         tx1(txdp + mainpart + i, pkts + mainpart + i);
226                 }
227         }
228 }
229
230 static inline uint16_t
231 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
232              uint16_t nb_pkts)
233 {
234         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
235         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
236         uint16_t n = 0;
237
238         /*
239          * Begin scanning the H/W ring for done descriptors when the
240          * number of available descriptors drops below tx_free_thresh.  For
241          * each done descriptor, free the associated buffer.
242          */
243         if (txq->nb_tx_free < txq->tx_free_thresh)
244                 ixgbe_tx_free_bufs(txq);
245
246         /* Only use descriptors that are available */
247         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
248         if (unlikely(nb_pkts == 0))
249                 return 0;
250
251         /* Use exactly nb_pkts descriptors */
252         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
253
254         /*
255          * At this point, we know there are enough descriptors in the
256          * ring to transmit all the packets.  This assumes that each
257          * mbuf contains a single segment, and that no new offloads
258          * are expected, which would require a new context descriptor.
259          */
260
261         /*
262          * See if we're going to wrap-around. If so, handle the top
263          * of the descriptor ring first, then do the bottom.  If not,
264          * the processing looks just like the "bottom" part anyway...
265          */
266         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
267                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
268                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
269
270                 /*
271                  * We know that the last descriptor in the ring will need to
272                  * have its RS bit set because tx_rs_thresh has to be
273                  * a divisor of the ring size
274                  */
275                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
276                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
277                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
278
279                 txq->tx_tail = 0;
280         }
281
282         /* Fill H/W descriptor ring with mbuf data */
283         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
284         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
285
286         /*
287          * Determine if RS bit should be set
288          * This is what we actually want:
289          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
290          * but instead of subtracting 1 and doing >=, we can just do
291          * greater than without subtracting.
292          */
293         if (txq->tx_tail > txq->tx_next_rs) {
294                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
295                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
296                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
297                                                 txq->tx_rs_thresh);
298                 if (txq->tx_next_rs >= txq->nb_tx_desc)
299                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
300         }
301
302         /*
303          * Check for wrap-around. This would only happen if we used
304          * up to the last descriptor in the ring, no more, no less.
305          */
306         if (txq->tx_tail >= txq->nb_tx_desc)
307                 txq->tx_tail = 0;
308
309         /* update tail pointer */
310         rte_wmb();
311         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
312
313         return nb_pkts;
314 }
315
316 uint16_t
317 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
318                        uint16_t nb_pkts)
319 {
320         uint16_t nb_tx;
321
322         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
323         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
324                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
325
326         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
327         nb_tx = 0;
328         while (nb_pkts) {
329                 uint16_t ret, n;
330
331                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
332                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
333                 nb_tx = (uint16_t)(nb_tx + ret);
334                 nb_pkts = (uint16_t)(nb_pkts - ret);
335                 if (ret < n)
336                         break;
337         }
338
339         return nb_tx;
340 }
341
342 static uint16_t
343 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
344                     uint16_t nb_pkts)
345 {
346         uint16_t nb_tx = 0;
347         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
348
349         while (nb_pkts) {
350                 uint16_t ret, num;
351
352                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
353                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
354                                                  num);
355                 nb_tx += ret;
356                 nb_pkts -= ret;
357                 if (ret < num)
358                         break;
359         }
360
361         return nb_tx;
362 }
363
364 static inline void
365 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
366                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
367                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
368                 __rte_unused uint64_t *mdata)
369 {
370         uint32_t type_tucmd_mlhl;
371         uint32_t mss_l4len_idx = 0;
372         uint32_t ctx_idx;
373         uint32_t vlan_macip_lens;
374         union ixgbe_tx_offload tx_offload_mask;
375         uint32_t seqnum_seed = 0;
376
377         ctx_idx = txq->ctx_curr;
378         tx_offload_mask.data[0] = 0;
379         tx_offload_mask.data[1] = 0;
380         type_tucmd_mlhl = 0;
381
382         /* Specify which HW CTX to upload. */
383         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
384
385         if (ol_flags & PKT_TX_VLAN_PKT) {
386                 tx_offload_mask.vlan_tci |= ~0;
387         }
388
389         /* check if TCP segmentation required for this packet */
390         if (ol_flags & PKT_TX_TCP_SEG) {
391                 /* implies IP cksum in IPv4 */
392                 if (ol_flags & PKT_TX_IP_CKSUM)
393                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
394                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
395                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
396                 else
397                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
398                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
399                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
400
401                 tx_offload_mask.l2_len |= ~0;
402                 tx_offload_mask.l3_len |= ~0;
403                 tx_offload_mask.l4_len |= ~0;
404                 tx_offload_mask.tso_segsz |= ~0;
405                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
406                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
407         } else { /* no TSO, check if hardware checksum is needed */
408                 if (ol_flags & PKT_TX_IP_CKSUM) {
409                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
410                         tx_offload_mask.l2_len |= ~0;
411                         tx_offload_mask.l3_len |= ~0;
412                 }
413
414                 switch (ol_flags & PKT_TX_L4_MASK) {
415                 case PKT_TX_UDP_CKSUM:
416                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
417                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
418                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
419                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
427                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
428                         tx_offload_mask.l2_len |= ~0;
429                         tx_offload_mask.l3_len |= ~0;
430                         break;
431                 case PKT_TX_SCTP_CKSUM:
432                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
433                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
434                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
435                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
436                         tx_offload_mask.l2_len |= ~0;
437                         tx_offload_mask.l3_len |= ~0;
438                         break;
439                 default:
440                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
441                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
442                         break;
443                 }
444         }
445
446         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
447                 tx_offload_mask.outer_l2_len |= ~0;
448                 tx_offload_mask.outer_l3_len |= ~0;
449                 tx_offload_mask.l2_len |= ~0;
450                 seqnum_seed |= tx_offload.outer_l3_len
451                                << IXGBE_ADVTXD_OUTER_IPLEN;
452                 seqnum_seed |= tx_offload.l2_len
453                                << IXGBE_ADVTXD_TUNNEL_LEN;
454         }
455 #ifdef RTE_LIBRTE_SECURITY
456         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
457                 union ixgbe_crypto_tx_desc_md *md =
458                                 (union ixgbe_crypto_tx_desc_md *)mdata;
459                 seqnum_seed |=
460                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
461                 type_tucmd_mlhl |= md->enc ?
462                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
463                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
464                 type_tucmd_mlhl |=
465                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
466                 tx_offload_mask.sa_idx |= ~0;
467                 tx_offload_mask.sec_pad_len |= ~0;
468         }
469 #endif
470
471         txq->ctx_cache[ctx_idx].flags = ol_flags;
472         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
473                 tx_offload_mask.data[0] & tx_offload.data[0];
474         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
475                 tx_offload_mask.data[1] & tx_offload.data[1];
476         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
477
478         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
479         vlan_macip_lens = tx_offload.l3_len;
480         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
481                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
482                                     IXGBE_ADVTXD_MACLEN_SHIFT);
483         else
484                 vlan_macip_lens |= (tx_offload.l2_len <<
485                                     IXGBE_ADVTXD_MACLEN_SHIFT);
486         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
487         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
488         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
489         ctx_txd->seqnum_seed     = seqnum_seed;
490 }
491
492 /*
493  * Check which hardware context can be used. Use the existing match
494  * or create a new context descriptor.
495  */
496 static inline uint32_t
497 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
498                    union ixgbe_tx_offload tx_offload)
499 {
500         /* If match with the current used context */
501         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
502                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
503                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
504                      & tx_offload.data[0])) &&
505                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
506                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
507                      & tx_offload.data[1]))))
508                 return txq->ctx_curr;
509
510         /* What if match with the next context  */
511         txq->ctx_curr ^= 1;
512         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
513                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
514                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
515                      & tx_offload.data[0])) &&
516                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
517                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
518                      & tx_offload.data[1]))))
519                 return txq->ctx_curr;
520
521         /* Mismatch, use the previous context */
522         return IXGBE_CTX_NUM;
523 }
524
525 static inline uint32_t
526 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
527 {
528         uint32_t tmp = 0;
529
530         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
531                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
532         if (ol_flags & PKT_TX_IP_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
534         if (ol_flags & PKT_TX_TCP_SEG)
535                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
536         return tmp;
537 }
538
539 static inline uint32_t
540 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
541 {
542         uint32_t cmdtype = 0;
543
544         if (ol_flags & PKT_TX_VLAN_PKT)
545                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
548         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
549                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
550         if (ol_flags & PKT_TX_MACSEC)
551                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
552         return cmdtype;
553 }
554
555 /* Default RS bit threshold values */
556 #ifndef DEFAULT_TX_RS_THRESH
557 #define DEFAULT_TX_RS_THRESH   32
558 #endif
559 #ifndef DEFAULT_TX_FREE_THRESH
560 #define DEFAULT_TX_FREE_THRESH 32
561 #endif
562
563 /* Reset transmit descriptors after they have been used */
564 static inline int
565 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
566 {
567         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
568         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
569         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
570         uint16_t nb_tx_desc = txq->nb_tx_desc;
571         uint16_t desc_to_clean_to;
572         uint16_t nb_tx_to_clean;
573         uint32_t status;
574
575         /* Determine the last descriptor needing to be cleaned */
576         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
577         if (desc_to_clean_to >= nb_tx_desc)
578                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
579
580         /* Check to make sure the last descriptor to clean is done */
581         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
582         status = txr[desc_to_clean_to].wb.status;
583         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
584                 PMD_TX_FREE_LOG(DEBUG,
585                                 "TX descriptor %4u is not done"
586                                 "(port=%d queue=%d)",
587                                 desc_to_clean_to,
588                                 txq->port_id, txq->queue_id);
589                 /* Failed to clean any descriptors, better luck next time */
590                 return -(1);
591         }
592
593         /* Figure out how many descriptors will be cleaned */
594         if (last_desc_cleaned > desc_to_clean_to)
595                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
596                                                         desc_to_clean_to);
597         else
598                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
599                                                 last_desc_cleaned);
600
601         PMD_TX_FREE_LOG(DEBUG,
602                         "Cleaning %4u TX descriptors: %4u to %4u "
603                         "(port=%d queue=%d)",
604                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
605                         txq->port_id, txq->queue_id);
606
607         /*
608          * The last descriptor to clean is done, so that means all the
609          * descriptors from the last descriptor that was cleaned
610          * up to the last descriptor with the RS bit set
611          * are done. Only reset the threshold descriptor.
612          */
613         txr[desc_to_clean_to].wb.status = 0;
614
615         /* Update the txq to reflect the last descriptor that was cleaned */
616         txq->last_desc_cleaned = desc_to_clean_to;
617         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
618
619         /* No Error */
620         return 0;
621 }
622
623 uint16_t
624 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
625                 uint16_t nb_pkts)
626 {
627         struct ixgbe_tx_queue *txq;
628         struct ixgbe_tx_entry *sw_ring;
629         struct ixgbe_tx_entry *txe, *txn;
630         volatile union ixgbe_adv_tx_desc *txr;
631         volatile union ixgbe_adv_tx_desc *txd, *txp;
632         struct rte_mbuf     *tx_pkt;
633         struct rte_mbuf     *m_seg;
634         uint64_t buf_dma_addr;
635         uint32_t olinfo_status;
636         uint32_t cmd_type_len;
637         uint32_t pkt_len;
638         uint16_t slen;
639         uint64_t ol_flags;
640         uint16_t tx_id;
641         uint16_t tx_last;
642         uint16_t nb_tx;
643         uint16_t nb_used;
644         uint64_t tx_ol_req;
645         uint32_t ctx = 0;
646         uint32_t new_ctx;
647         union ixgbe_tx_offload tx_offload;
648 #ifdef RTE_LIBRTE_SECURITY
649         uint8_t use_ipsec;
650 #endif
651
652         tx_offload.data[0] = 0;
653         tx_offload.data[1] = 0;
654         txq = tx_queue;
655         sw_ring = txq->sw_ring;
656         txr     = txq->tx_ring;
657         tx_id   = txq->tx_tail;
658         txe = &sw_ring[tx_id];
659         txp = NULL;
660
661         /* Determine if the descriptor ring needs to be cleaned. */
662         if (txq->nb_tx_free < txq->tx_free_thresh)
663                 ixgbe_xmit_cleanup(txq);
664
665         rte_prefetch0(&txe->mbuf->pool);
666
667         /* TX loop */
668         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
669                 new_ctx = 0;
670                 tx_pkt = *tx_pkts++;
671                 pkt_len = tx_pkt->pkt_len;
672
673                 /*
674                  * Determine how many (if any) context descriptors
675                  * are needed for offload functionality.
676                  */
677                 ol_flags = tx_pkt->ol_flags;
678 #ifdef RTE_LIBRTE_SECURITY
679                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
680 #endif
681
682                 /* If hardware offload required */
683                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
684                 if (tx_ol_req) {
685                         tx_offload.l2_len = tx_pkt->l2_len;
686                         tx_offload.l3_len = tx_pkt->l3_len;
687                         tx_offload.l4_len = tx_pkt->l4_len;
688                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
689                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
690                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
691                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
692 #ifdef RTE_LIBRTE_SECURITY
693                         if (use_ipsec) {
694                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
695                                         (union ixgbe_crypto_tx_desc_md *)
696                                                         &tx_pkt->udata64;
697                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
698                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
699                         }
700 #endif
701
702                         /* If new context need be built or reuse the exist ctx. */
703                         ctx = what_advctx_update(txq, tx_ol_req,
704                                 tx_offload);
705                         /* Only allocate context descriptor if required*/
706                         new_ctx = (ctx == IXGBE_CTX_NUM);
707                         ctx = txq->ctx_curr;
708                 }
709
710                 /*
711                  * Keep track of how many descriptors are used this loop
712                  * This will always be the number of segments + the number of
713                  * Context descriptors required to transmit the packet
714                  */
715                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
716
717                 if (txp != NULL &&
718                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
719                         /* set RS on the previous packet in the burst */
720                         txp->read.cmd_type_len |=
721                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
722
723                 /*
724                  * The number of descriptors that must be allocated for a
725                  * packet is the number of segments of that packet, plus 1
726                  * Context Descriptor for the hardware offload, if any.
727                  * Determine the last TX descriptor to allocate in the TX ring
728                  * for the packet, starting from the current position (tx_id)
729                  * in the ring.
730                  */
731                 tx_last = (uint16_t) (tx_id + nb_used - 1);
732
733                 /* Circular ring */
734                 if (tx_last >= txq->nb_tx_desc)
735                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
736
737                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
738                            " tx_first=%u tx_last=%u",
739                            (unsigned) txq->port_id,
740                            (unsigned) txq->queue_id,
741                            (unsigned) pkt_len,
742                            (unsigned) tx_id,
743                            (unsigned) tx_last);
744
745                 /*
746                  * Make sure there are enough TX descriptors available to
747                  * transmit the entire packet.
748                  * nb_used better be less than or equal to txq->tx_rs_thresh
749                  */
750                 if (nb_used > txq->nb_tx_free) {
751                         PMD_TX_FREE_LOG(DEBUG,
752                                         "Not enough free TX descriptors "
753                                         "nb_used=%4u nb_free=%4u "
754                                         "(port=%d queue=%d)",
755                                         nb_used, txq->nb_tx_free,
756                                         txq->port_id, txq->queue_id);
757
758                         if (ixgbe_xmit_cleanup(txq) != 0) {
759                                 /* Could not clean any descriptors */
760                                 if (nb_tx == 0)
761                                         return 0;
762                                 goto end_of_tx;
763                         }
764
765                         /* nb_used better be <= txq->tx_rs_thresh */
766                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
767                                 PMD_TX_FREE_LOG(DEBUG,
768                                         "The number of descriptors needed to "
769                                         "transmit the packet exceeds the "
770                                         "RS bit threshold. This will impact "
771                                         "performance."
772                                         "nb_used=%4u nb_free=%4u "
773                                         "tx_rs_thresh=%4u. "
774                                         "(port=%d queue=%d)",
775                                         nb_used, txq->nb_tx_free,
776                                         txq->tx_rs_thresh,
777                                         txq->port_id, txq->queue_id);
778                                 /*
779                                  * Loop here until there are enough TX
780                                  * descriptors or until the ring cannot be
781                                  * cleaned.
782                                  */
783                                 while (nb_used > txq->nb_tx_free) {
784                                         if (ixgbe_xmit_cleanup(txq) != 0) {
785                                                 /*
786                                                  * Could not clean any
787                                                  * descriptors
788                                                  */
789                                                 if (nb_tx == 0)
790                                                         return 0;
791                                                 goto end_of_tx;
792                                         }
793                                 }
794                         }
795                 }
796
797                 /*
798                  * By now there are enough free TX descriptors to transmit
799                  * the packet.
800                  */
801
802                 /*
803                  * Set common flags of all TX Data Descriptors.
804                  *
805                  * The following bits must be set in all Data Descriptors:
806                  *   - IXGBE_ADVTXD_DTYP_DATA
807                  *   - IXGBE_ADVTXD_DCMD_DEXT
808                  *
809                  * The following bits must be set in the first Data Descriptor
810                  * and are ignored in the other ones:
811                  *   - IXGBE_ADVTXD_DCMD_IFCS
812                  *   - IXGBE_ADVTXD_MAC_1588
813                  *   - IXGBE_ADVTXD_DCMD_VLE
814                  *
815                  * The following bits must only be set in the last Data
816                  * Descriptor:
817                  *   - IXGBE_TXD_CMD_EOP
818                  *
819                  * The following bits can be set in any Data Descriptor, but
820                  * are only set in the last Data Descriptor:
821                  *   - IXGBE_TXD_CMD_RS
822                  */
823                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
824                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
825
826 #ifdef RTE_LIBRTE_IEEE1588
827                 if (ol_flags & PKT_TX_IEEE1588_TMST)
828                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
829 #endif
830
831                 olinfo_status = 0;
832                 if (tx_ol_req) {
833
834                         if (ol_flags & PKT_TX_TCP_SEG) {
835                                 /* when TSO is on, paylen in descriptor is the
836                                  * not the packet len but the tcp payload len */
837                                 pkt_len -= (tx_offload.l2_len +
838                                         tx_offload.l3_len + tx_offload.l4_len);
839                         }
840
841                         /*
842                          * Setup the TX Advanced Context Descriptor if required
843                          */
844                         if (new_ctx) {
845                                 volatile struct ixgbe_adv_tx_context_desc *
846                                     ctx_txd;
847
848                                 ctx_txd = (volatile struct
849                                     ixgbe_adv_tx_context_desc *)
850                                     &txr[tx_id];
851
852                                 txn = &sw_ring[txe->next_id];
853                                 rte_prefetch0(&txn->mbuf->pool);
854
855                                 if (txe->mbuf != NULL) {
856                                         rte_pktmbuf_free_seg(txe->mbuf);
857                                         txe->mbuf = NULL;
858                                 }
859
860                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
861                                         tx_offload, &tx_pkt->udata64);
862
863                                 txe->last_id = tx_last;
864                                 tx_id = txe->next_id;
865                                 txe = txn;
866                         }
867
868                         /*
869                          * Setup the TX Advanced Data Descriptor,
870                          * This path will go through
871                          * whatever new/reuse the context descriptor
872                          */
873                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
874                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
875                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
876                 }
877
878                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
879 #ifdef RTE_LIBRTE_SECURITY
880                 if (use_ipsec)
881                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
882 #endif
883
884                 m_seg = tx_pkt;
885                 do {
886                         txd = &txr[tx_id];
887                         txn = &sw_ring[txe->next_id];
888                         rte_prefetch0(&txn->mbuf->pool);
889
890                         if (txe->mbuf != NULL)
891                                 rte_pktmbuf_free_seg(txe->mbuf);
892                         txe->mbuf = m_seg;
893
894                         /*
895                          * Set up Transmit Data Descriptor.
896                          */
897                         slen = m_seg->data_len;
898                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
899                         txd->read.buffer_addr =
900                                 rte_cpu_to_le_64(buf_dma_addr);
901                         txd->read.cmd_type_len =
902                                 rte_cpu_to_le_32(cmd_type_len | slen);
903                         txd->read.olinfo_status =
904                                 rte_cpu_to_le_32(olinfo_status);
905                         txe->last_id = tx_last;
906                         tx_id = txe->next_id;
907                         txe = txn;
908                         m_seg = m_seg->next;
909                 } while (m_seg != NULL);
910
911                 /*
912                  * The last packet data descriptor needs End Of Packet (EOP)
913                  */
914                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
915                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
916                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
917
918                 /* Set RS bit only on threshold packets' last descriptor */
919                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
920                         PMD_TX_FREE_LOG(DEBUG,
921                                         "Setting RS bit on TXD id="
922                                         "%4u (port=%d queue=%d)",
923                                         tx_last, txq->port_id, txq->queue_id);
924
925                         cmd_type_len |= IXGBE_TXD_CMD_RS;
926
927                         /* Update txq RS bit counters */
928                         txq->nb_tx_used = 0;
929                         txp = NULL;
930                 } else
931                         txp = txd;
932
933                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
934         }
935
936 end_of_tx:
937         /* set RS on last packet in the burst */
938         if (txp != NULL)
939                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
940
941         rte_wmb();
942
943         /*
944          * Set the Transmit Descriptor Tail (TDT)
945          */
946         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
947                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
948                    (unsigned) tx_id, (unsigned) nb_tx);
949         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
950         txq->tx_tail = tx_id;
951
952         return nb_tx;
953 }
954
955 /*********************************************************************
956  *
957  *  TX prep functions
958  *
959  **********************************************************************/
960 uint16_t
961 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
962 {
963         int i, ret;
964         uint64_t ol_flags;
965         struct rte_mbuf *m;
966         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
967
968         for (i = 0; i < nb_pkts; i++) {
969                 m = tx_pkts[i];
970                 ol_flags = m->ol_flags;
971
972                 /**
973                  * Check if packet meets requirements for number of segments
974                  *
975                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
976                  *       non-TSO
977                  */
978
979                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
980                         rte_errno = EINVAL;
981                         return i;
982                 }
983
984                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
985                         rte_errno = ENOTSUP;
986                         return i;
987                 }
988
989                 /* check the size of packet */
990                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
991                         rte_errno = EINVAL;
992                         return i;
993                 }
994
995 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
996                 ret = rte_validate_tx_offload(m);
997                 if (ret != 0) {
998                         rte_errno = -ret;
999                         return i;
1000                 }
1001 #endif
1002                 ret = rte_net_intel_cksum_prepare(m);
1003                 if (ret != 0) {
1004                         rte_errno = -ret;
1005                         return i;
1006                 }
1007         }
1008
1009         return i;
1010 }
1011
1012 /*********************************************************************
1013  *
1014  *  RX functions
1015  *
1016  **********************************************************************/
1017
1018 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1019 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1020 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1021 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1022 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1023 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1024 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1027 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1028 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1029 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1030 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1031 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1032 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1035 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1036 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1039 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1040 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1043 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1044 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1047 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1048 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1051
1052 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1075
1076 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1099
1100 /**
1101  * Use 2 different table for normal packet and tunnel packet
1102  * to save the space.
1103  */
1104 const uint32_t
1105         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1106         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1107         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1108                 RTE_PTYPE_L3_IPV4,
1109         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1110                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1111         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1112                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1113         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1114                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1115         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1116                 RTE_PTYPE_L3_IPV4_EXT,
1117         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1118                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1119         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1120                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1121         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1122                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1123         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1124                 RTE_PTYPE_L3_IPV6,
1125         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1126                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1127         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1128                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1129         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1131         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1132                 RTE_PTYPE_L3_IPV6_EXT,
1133         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1134                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1135         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1137         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1138                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1139         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1140                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1141                 RTE_PTYPE_INNER_L3_IPV6,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1145         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1148         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1151         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1157         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1160         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1163         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1175         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1185                 RTE_PTYPE_L2_ETHER |
1186                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1187                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1188 };
1189
1190 const uint32_t
1191         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1192         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1193                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1194                 RTE_PTYPE_INNER_L2_ETHER,
1195         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1210         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1216                 RTE_PTYPE_INNER_L4_TCP,
1217         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1218                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1220                 RTE_PTYPE_INNER_L4_TCP,
1221         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1222                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1223                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1227                 RTE_PTYPE_INNER_L4_TCP,
1228         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1229                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1230                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1231                 RTE_PTYPE_INNER_L3_IPV4,
1232         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1233                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1234                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1235                 RTE_PTYPE_INNER_L4_UDP,
1236         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1237                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1238                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1239                 RTE_PTYPE_INNER_L4_UDP,
1240         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1241                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1242                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1243                 RTE_PTYPE_INNER_L4_SCTP,
1244         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1245                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1246                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1250                 RTE_PTYPE_INNER_L4_UDP,
1251         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1252                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1253                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1254                 RTE_PTYPE_INNER_L4_SCTP,
1255         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1256                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1257                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1258                 RTE_PTYPE_INNER_L3_IPV4,
1259         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1260                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1261                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1262                 RTE_PTYPE_INNER_L4_SCTP,
1263         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1264                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1265                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1266                 RTE_PTYPE_INNER_L4_SCTP,
1267         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1268                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1269                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1270                 RTE_PTYPE_INNER_L4_TCP,
1271         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1272                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1273                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1274                 RTE_PTYPE_INNER_L4_UDP,
1275
1276         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1277                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1278                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1279         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1282                 RTE_PTYPE_INNER_L3_IPV4,
1283         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1284                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1285                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1286                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1287         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1288                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1289                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1290                 RTE_PTYPE_INNER_L3_IPV6,
1291         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1292                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1293                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1294                 RTE_PTYPE_INNER_L3_IPV4,
1295         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1296                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1297                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1298                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1299         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1300                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1301                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1302                 RTE_PTYPE_INNER_L3_IPV4,
1303         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1304                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1305                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1306                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1307         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1308                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1309                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1310                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1311         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1312                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1313                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1314                 RTE_PTYPE_INNER_L3_IPV4,
1315         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1316                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1317                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1318                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1319         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1320                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1321                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1322                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1323         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1324                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1325                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1326                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1327         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1328                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1329                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1330                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1331         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1332                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1333                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1334                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1335         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1336                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1337                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1338                 RTE_PTYPE_INNER_L3_IPV4,
1339         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1340                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1341                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1342                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1343         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1344                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1345                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1346                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1347         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1348                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1349                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1350                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1351         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1352                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1353                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1354                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1355         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1356                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1357                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1358                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1359         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1360                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1361                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1362                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1363         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1364                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1365                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1366                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1367 };
1368
1369 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1370 static inline uint32_t
1371 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1372 {
1373
1374         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1375                 return RTE_PTYPE_UNKNOWN;
1376
1377         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1378
1379         /* For tunnel packet */
1380         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1381                 /* Remove the tunnel bit to save the space. */
1382                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1383                 return ptype_table_tn[pkt_info];
1384         }
1385
1386         /**
1387          * For x550, if it's not tunnel,
1388          * tunnel type bit should be set to 0.
1389          * Reuse 82599's mask.
1390          */
1391         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1392
1393         return ptype_table[pkt_info];
1394 }
1395
1396 static inline uint64_t
1397 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1398 {
1399         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1400                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1401                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1402                 PKT_RX_RSS_HASH, 0, 0, 0,
1403                 0, 0, 0,  PKT_RX_FDIR,
1404         };
1405 #ifdef RTE_LIBRTE_IEEE1588
1406         static uint64_t ip_pkt_etqf_map[8] = {
1407                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1408                 0, 0, 0, 0,
1409         };
1410
1411         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1412                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1413                                 ip_rss_types_map[pkt_info & 0XF];
1414         else
1415                 return ip_rss_types_map[pkt_info & 0XF];
1416 #else
1417         return ip_rss_types_map[pkt_info & 0XF];
1418 #endif
1419 }
1420
1421 static inline uint64_t
1422 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1423 {
1424         uint64_t pkt_flags;
1425
1426         /*
1427          * Check if VLAN present only.
1428          * Do not check whether L3/L4 rx checksum done by NIC or not,
1429          * That can be found from rte_eth_rxmode.offloads flag
1430          */
1431         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1432
1433 #ifdef RTE_LIBRTE_IEEE1588
1434         if (rx_status & IXGBE_RXD_STAT_TMST)
1435                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1436 #endif
1437         return pkt_flags;
1438 }
1439
1440 static inline uint64_t
1441 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1442 {
1443         uint64_t pkt_flags;
1444
1445         /*
1446          * Bit 31: IPE, IPv4 checksum error
1447          * Bit 30: L4I, L4I integrity error
1448          */
1449         static uint64_t error_to_pkt_flags_map[4] = {
1450                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1451                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1452                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1453                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1454         };
1455         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1456                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1457
1458         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1459             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1460                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1461         }
1462
1463 #ifdef RTE_LIBRTE_SECURITY
1464         if (rx_status & IXGBE_RXD_STAT_SECP) {
1465                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1466                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1467                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1468         }
1469 #endif
1470
1471         return pkt_flags;
1472 }
1473
1474 /*
1475  * LOOK_AHEAD defines how many desc statuses to check beyond the
1476  * current descriptor.
1477  * It must be a pound define for optimal performance.
1478  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1479  * function only works with LOOK_AHEAD=8.
1480  */
1481 #define LOOK_AHEAD 8
1482 #if (LOOK_AHEAD != 8)
1483 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1484 #endif
1485 static inline int
1486 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1487 {
1488         volatile union ixgbe_adv_rx_desc *rxdp;
1489         struct ixgbe_rx_entry *rxep;
1490         struct rte_mbuf *mb;
1491         uint16_t pkt_len;
1492         uint64_t pkt_flags;
1493         int nb_dd;
1494         uint32_t s[LOOK_AHEAD];
1495         uint32_t pkt_info[LOOK_AHEAD];
1496         int i, j, nb_rx = 0;
1497         uint32_t status;
1498         uint64_t vlan_flags = rxq->vlan_flags;
1499
1500         /* get references to current descriptor and S/W ring entry */
1501         rxdp = &rxq->rx_ring[rxq->rx_tail];
1502         rxep = &rxq->sw_ring[rxq->rx_tail];
1503
1504         status = rxdp->wb.upper.status_error;
1505         /* check to make sure there is at least 1 packet to receive */
1506         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1507                 return 0;
1508
1509         /*
1510          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1511          * reference packets that are ready to be received.
1512          */
1513         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1514              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1515                 /* Read desc statuses backwards to avoid race condition */
1516                 for (j = 0; j < LOOK_AHEAD; j++)
1517                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1518
1519                 rte_smp_rmb();
1520
1521                 /* Compute how many status bits were set */
1522                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1523                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1524                         ;
1525
1526                 for (j = 0; j < nb_dd; j++)
1527                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1528                                                        lo_dword.data);
1529
1530                 nb_rx += nb_dd;
1531
1532                 /* Translate descriptor info to mbuf format */
1533                 for (j = 0; j < nb_dd; ++j) {
1534                         mb = rxep[j].mbuf;
1535                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1536                                   rxq->crc_len;
1537                         mb->data_len = pkt_len;
1538                         mb->pkt_len = pkt_len;
1539                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1540
1541                         /* convert descriptor fields to rte mbuf flags */
1542                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1543                                 vlan_flags);
1544                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1545                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1546                                         ((uint16_t)pkt_info[j]);
1547                         mb->ol_flags = pkt_flags;
1548                         mb->packet_type =
1549                                 ixgbe_rxd_pkt_info_to_pkt_type
1550                                         (pkt_info[j], rxq->pkt_type_mask);
1551
1552                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1553                                 mb->hash.rss = rte_le_to_cpu_32(
1554                                     rxdp[j].wb.lower.hi_dword.rss);
1555                         else if (pkt_flags & PKT_RX_FDIR) {
1556                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1557                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1558                                     IXGBE_ATR_HASH_MASK;
1559                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1560                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1561                         }
1562                 }
1563
1564                 /* Move mbuf pointers from the S/W ring to the stage */
1565                 for (j = 0; j < LOOK_AHEAD; ++j) {
1566                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1567                 }
1568
1569                 /* stop if all requested packets could not be received */
1570                 if (nb_dd != LOOK_AHEAD)
1571                         break;
1572         }
1573
1574         /* clear software ring entries so we can cleanup correctly */
1575         for (i = 0; i < nb_rx; ++i) {
1576                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1577         }
1578
1579
1580         return nb_rx;
1581 }
1582
1583 static inline int
1584 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1585 {
1586         volatile union ixgbe_adv_rx_desc *rxdp;
1587         struct ixgbe_rx_entry *rxep;
1588         struct rte_mbuf *mb;
1589         uint16_t alloc_idx;
1590         __le64 dma_addr;
1591         int diag, i;
1592
1593         /* allocate buffers in bulk directly into the S/W ring */
1594         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1595         rxep = &rxq->sw_ring[alloc_idx];
1596         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1597                                     rxq->rx_free_thresh);
1598         if (unlikely(diag != 0))
1599                 return -ENOMEM;
1600
1601         rxdp = &rxq->rx_ring[alloc_idx];
1602         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1603                 /* populate the static rte mbuf fields */
1604                 mb = rxep[i].mbuf;
1605                 if (reset_mbuf) {
1606                         mb->port = rxq->port_id;
1607                 }
1608
1609                 rte_mbuf_refcnt_set(mb, 1);
1610                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1611
1612                 /* populate the descriptors */
1613                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1614                 rxdp[i].read.hdr_addr = 0;
1615                 rxdp[i].read.pkt_addr = dma_addr;
1616         }
1617
1618         /* update state of internal queue structure */
1619         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1620         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1621                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1622
1623         /* no errors */
1624         return 0;
1625 }
1626
1627 static inline uint16_t
1628 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1629                          uint16_t nb_pkts)
1630 {
1631         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1632         int i;
1633
1634         /* how many packets are ready to return? */
1635         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1636
1637         /* copy mbuf pointers to the application's packet list */
1638         for (i = 0; i < nb_pkts; ++i)
1639                 rx_pkts[i] = stage[i];
1640
1641         /* update internal queue state */
1642         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1643         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1644
1645         return nb_pkts;
1646 }
1647
1648 static inline uint16_t
1649 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1650              uint16_t nb_pkts)
1651 {
1652         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1653         uint16_t nb_rx = 0;
1654
1655         /* Any previously recv'd pkts will be returned from the Rx stage */
1656         if (rxq->rx_nb_avail)
1657                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1658
1659         /* Scan the H/W ring for packets to receive */
1660         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1661
1662         /* update internal queue state */
1663         rxq->rx_next_avail = 0;
1664         rxq->rx_nb_avail = nb_rx;
1665         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1666
1667         /* if required, allocate new buffers to replenish descriptors */
1668         if (rxq->rx_tail > rxq->rx_free_trigger) {
1669                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1670
1671                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1672                         int i, j;
1673
1674                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1675                                    "queue_id=%u", (unsigned) rxq->port_id,
1676                                    (unsigned) rxq->queue_id);
1677
1678                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1679                                 rxq->rx_free_thresh;
1680
1681                         /*
1682                          * Need to rewind any previous receives if we cannot
1683                          * allocate new buffers to replenish the old ones.
1684                          */
1685                         rxq->rx_nb_avail = 0;
1686                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1687                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1688                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1689
1690                         return 0;
1691                 }
1692
1693                 /* update tail pointer */
1694                 rte_wmb();
1695                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1696                                             cur_free_trigger);
1697         }
1698
1699         if (rxq->rx_tail >= rxq->nb_rx_desc)
1700                 rxq->rx_tail = 0;
1701
1702         /* received any packets this loop? */
1703         if (rxq->rx_nb_avail)
1704                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1705
1706         return 0;
1707 }
1708
1709 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1710 uint16_t
1711 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1712                            uint16_t nb_pkts)
1713 {
1714         uint16_t nb_rx;
1715
1716         if (unlikely(nb_pkts == 0))
1717                 return 0;
1718
1719         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1720                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1721
1722         /* request is relatively large, chunk it up */
1723         nb_rx = 0;
1724         while (nb_pkts) {
1725                 uint16_t ret, n;
1726
1727                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1728                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1729                 nb_rx = (uint16_t)(nb_rx + ret);
1730                 nb_pkts = (uint16_t)(nb_pkts - ret);
1731                 if (ret < n)
1732                         break;
1733         }
1734
1735         return nb_rx;
1736 }
1737
1738 uint16_t
1739 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1740                 uint16_t nb_pkts)
1741 {
1742         struct ixgbe_rx_queue *rxq;
1743         volatile union ixgbe_adv_rx_desc *rx_ring;
1744         volatile union ixgbe_adv_rx_desc *rxdp;
1745         struct ixgbe_rx_entry *sw_ring;
1746         struct ixgbe_rx_entry *rxe;
1747         struct rte_mbuf *rxm;
1748         struct rte_mbuf *nmb;
1749         union ixgbe_adv_rx_desc rxd;
1750         uint64_t dma_addr;
1751         uint32_t staterr;
1752         uint32_t pkt_info;
1753         uint16_t pkt_len;
1754         uint16_t rx_id;
1755         uint16_t nb_rx;
1756         uint16_t nb_hold;
1757         uint64_t pkt_flags;
1758         uint64_t vlan_flags;
1759
1760         nb_rx = 0;
1761         nb_hold = 0;
1762         rxq = rx_queue;
1763         rx_id = rxq->rx_tail;
1764         rx_ring = rxq->rx_ring;
1765         sw_ring = rxq->sw_ring;
1766         vlan_flags = rxq->vlan_flags;
1767         while (nb_rx < nb_pkts) {
1768                 /*
1769                  * The order of operations here is important as the DD status
1770                  * bit must not be read after any other descriptor fields.
1771                  * rx_ring and rxdp are pointing to volatile data so the order
1772                  * of accesses cannot be reordered by the compiler. If they were
1773                  * not volatile, they could be reordered which could lead to
1774                  * using invalid descriptor fields when read from rxd.
1775                  */
1776                 rxdp = &rx_ring[rx_id];
1777                 staterr = rxdp->wb.upper.status_error;
1778                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1779                         break;
1780                 rxd = *rxdp;
1781
1782                 /*
1783                  * End of packet.
1784                  *
1785                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1786                  * is likely to be invalid and to be dropped by the various
1787                  * validation checks performed by the network stack.
1788                  *
1789                  * Allocate a new mbuf to replenish the RX ring descriptor.
1790                  * If the allocation fails:
1791                  *    - arrange for that RX descriptor to be the first one
1792                  *      being parsed the next time the receive function is
1793                  *      invoked [on the same queue].
1794                  *
1795                  *    - Stop parsing the RX ring and return immediately.
1796                  *
1797                  * This policy do not drop the packet received in the RX
1798                  * descriptor for which the allocation of a new mbuf failed.
1799                  * Thus, it allows that packet to be later retrieved if
1800                  * mbuf have been freed in the mean time.
1801                  * As a side effect, holding RX descriptors instead of
1802                  * systematically giving them back to the NIC may lead to
1803                  * RX ring exhaustion situations.
1804                  * However, the NIC can gracefully prevent such situations
1805                  * to happen by sending specific "back-pressure" flow control
1806                  * frames to its peer(s).
1807                  */
1808                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1809                            "ext_err_stat=0x%08x pkt_len=%u",
1810                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1811                            (unsigned) rx_id, (unsigned) staterr,
1812                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1813
1814                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1815                 if (nmb == NULL) {
1816                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1817                                    "queue_id=%u", (unsigned) rxq->port_id,
1818                                    (unsigned) rxq->queue_id);
1819                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1820                         break;
1821                 }
1822
1823                 nb_hold++;
1824                 rxe = &sw_ring[rx_id];
1825                 rx_id++;
1826                 if (rx_id == rxq->nb_rx_desc)
1827                         rx_id = 0;
1828
1829                 /* Prefetch next mbuf while processing current one. */
1830                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1831
1832                 /*
1833                  * When next RX descriptor is on a cache-line boundary,
1834                  * prefetch the next 4 RX descriptors and the next 8 pointers
1835                  * to mbufs.
1836                  */
1837                 if ((rx_id & 0x3) == 0) {
1838                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1839                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1840                 }
1841
1842                 rxm = rxe->mbuf;
1843                 rxe->mbuf = nmb;
1844                 dma_addr =
1845                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1846                 rxdp->read.hdr_addr = 0;
1847                 rxdp->read.pkt_addr = dma_addr;
1848
1849                 /*
1850                  * Initialize the returned mbuf.
1851                  * 1) setup generic mbuf fields:
1852                  *    - number of segments,
1853                  *    - next segment,
1854                  *    - packet length,
1855                  *    - RX port identifier.
1856                  * 2) integrate hardware offload data, if any:
1857                  *    - RSS flag & hash,
1858                  *    - IP checksum flag,
1859                  *    - VLAN TCI, if any,
1860                  *    - error flags.
1861                  */
1862                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1863                                       rxq->crc_len);
1864                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1865                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1866                 rxm->nb_segs = 1;
1867                 rxm->next = NULL;
1868                 rxm->pkt_len = pkt_len;
1869                 rxm->data_len = pkt_len;
1870                 rxm->port = rxq->port_id;
1871
1872                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1873                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1874                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1875
1876                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1877                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1878                 pkt_flags = pkt_flags |
1879                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1880                 rxm->ol_flags = pkt_flags;
1881                 rxm->packet_type =
1882                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1883                                                        rxq->pkt_type_mask);
1884
1885                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1886                         rxm->hash.rss = rte_le_to_cpu_32(
1887                                                 rxd.wb.lower.hi_dword.rss);
1888                 else if (pkt_flags & PKT_RX_FDIR) {
1889                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1890                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1891                                         IXGBE_ATR_HASH_MASK;
1892                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1893                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1894                 }
1895                 /*
1896                  * Store the mbuf address into the next entry of the array
1897                  * of returned packets.
1898                  */
1899                 rx_pkts[nb_rx++] = rxm;
1900         }
1901         rxq->rx_tail = rx_id;
1902
1903         /*
1904          * If the number of free RX descriptors is greater than the RX free
1905          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1906          * register.
1907          * Update the RDT with the value of the last processed RX descriptor
1908          * minus 1, to guarantee that the RDT register is never equal to the
1909          * RDH register, which creates a "full" ring situtation from the
1910          * hardware point of view...
1911          */
1912         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1913         if (nb_hold > rxq->rx_free_thresh) {
1914                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1915                            "nb_hold=%u nb_rx=%u",
1916                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1917                            (unsigned) rx_id, (unsigned) nb_hold,
1918                            (unsigned) nb_rx);
1919                 rx_id = (uint16_t) ((rx_id == 0) ?
1920                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1921                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1922                 nb_hold = 0;
1923         }
1924         rxq->nb_rx_hold = nb_hold;
1925         return nb_rx;
1926 }
1927
1928 /**
1929  * Detect an RSC descriptor.
1930  */
1931 static inline uint32_t
1932 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1933 {
1934         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1935                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1936 }
1937
1938 /**
1939  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1940  *
1941  * Fill the following info in the HEAD buffer of the Rx cluster:
1942  *    - RX port identifier
1943  *    - hardware offload data, if any:
1944  *      - RSS flag & hash
1945  *      - IP checksum flag
1946  *      - VLAN TCI, if any
1947  *      - error flags
1948  * @head HEAD of the packet cluster
1949  * @desc HW descriptor to get data from
1950  * @rxq Pointer to the Rx queue
1951  */
1952 static inline void
1953 ixgbe_fill_cluster_head_buf(
1954         struct rte_mbuf *head,
1955         union ixgbe_adv_rx_desc *desc,
1956         struct ixgbe_rx_queue *rxq,
1957         uint32_t staterr)
1958 {
1959         uint32_t pkt_info;
1960         uint64_t pkt_flags;
1961
1962         head->port = rxq->port_id;
1963
1964         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1965          * set in the pkt_flags field.
1966          */
1967         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1968         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1969         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1970         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1971         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1972         head->ol_flags = pkt_flags;
1973         head->packet_type =
1974                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1975
1976         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1977                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1978         else if (pkt_flags & PKT_RX_FDIR) {
1979                 head->hash.fdir.hash =
1980                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1981                                                           & IXGBE_ATR_HASH_MASK;
1982                 head->hash.fdir.id =
1983                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1984         }
1985 }
1986
1987 /**
1988  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1989  *
1990  * @rx_queue Rx queue handle
1991  * @rx_pkts table of received packets
1992  * @nb_pkts size of rx_pkts table
1993  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1994  *
1995  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1996  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1997  *
1998  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1999  * 1) When non-EOP RSC completion arrives:
2000  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2001  *       segment's data length.
2002  *    b) Set the "next" pointer of the current segment to point to the segment
2003  *       at the NEXTP index.
2004  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2005  *       in the sw_rsc_ring.
2006  * 2) When EOP arrives we just update the cluster's total length and offload
2007  *    flags and deliver the cluster up to the upper layers. In our case - put it
2008  *    in the rx_pkts table.
2009  *
2010  * Returns the number of received packets/clusters (according to the "bulk
2011  * receive" interface).
2012  */
2013 static inline uint16_t
2014 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2015                     bool bulk_alloc)
2016 {
2017         struct ixgbe_rx_queue *rxq = rx_queue;
2018         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2019         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2020         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2021         uint16_t rx_id = rxq->rx_tail;
2022         uint16_t nb_rx = 0;
2023         uint16_t nb_hold = rxq->nb_rx_hold;
2024         uint16_t prev_id = rxq->rx_tail;
2025
2026         while (nb_rx < nb_pkts) {
2027                 bool eop;
2028                 struct ixgbe_rx_entry *rxe;
2029                 struct ixgbe_scattered_rx_entry *sc_entry;
2030                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2031                 struct ixgbe_rx_entry *next_rxe = NULL;
2032                 struct rte_mbuf *first_seg;
2033                 struct rte_mbuf *rxm;
2034                 struct rte_mbuf *nmb = NULL;
2035                 union ixgbe_adv_rx_desc rxd;
2036                 uint16_t data_len;
2037                 uint16_t next_id;
2038                 volatile union ixgbe_adv_rx_desc *rxdp;
2039                 uint32_t staterr;
2040
2041 next_desc:
2042                 /*
2043                  * The code in this whole file uses the volatile pointer to
2044                  * ensure the read ordering of the status and the rest of the
2045                  * descriptor fields (on the compiler level only!!!). This is so
2046                  * UGLY - why not to just use the compiler barrier instead? DPDK
2047                  * even has the rte_compiler_barrier() for that.
2048                  *
2049                  * But most importantly this is just wrong because this doesn't
2050                  * ensure memory ordering in a general case at all. For
2051                  * instance, DPDK is supposed to work on Power CPUs where
2052                  * compiler barrier may just not be enough!
2053                  *
2054                  * I tried to write only this function properly to have a
2055                  * starting point (as a part of an LRO/RSC series) but the
2056                  * compiler cursed at me when I tried to cast away the
2057                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2058                  * keeping it the way it is for now.
2059                  *
2060                  * The code in this file is broken in so many other places and
2061                  * will just not work on a big endian CPU anyway therefore the
2062                  * lines below will have to be revisited together with the rest
2063                  * of the ixgbe PMD.
2064                  *
2065                  * TODO:
2066                  *    - Get rid of "volatile" and let the compiler do its job.
2067                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2068                  *      memory ordering below.
2069                  */
2070                 rxdp = &rx_ring[rx_id];
2071                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2072
2073                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2074                         break;
2075
2076                 rxd = *rxdp;
2077
2078                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2079                                   "staterr=0x%x data_len=%u",
2080                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2081                            rte_le_to_cpu_16(rxd.wb.upper.length));
2082
2083                 if (!bulk_alloc) {
2084                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2085                         if (nmb == NULL) {
2086                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2087                                                   "port_id=%u queue_id=%u",
2088                                            rxq->port_id, rxq->queue_id);
2089
2090                                 rte_eth_devices[rxq->port_id].data->
2091                                                         rx_mbuf_alloc_failed++;
2092                                 break;
2093                         }
2094                 } else if (nb_hold > rxq->rx_free_thresh) {
2095                         uint16_t next_rdt = rxq->rx_free_trigger;
2096
2097                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2098                                 rte_wmb();
2099                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2100                                                         rxq->rdt_reg_addr,
2101                                                         next_rdt);
2102                                 nb_hold -= rxq->rx_free_thresh;
2103                         } else {
2104                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2105                                                   "port_id=%u queue_id=%u",
2106                                            rxq->port_id, rxq->queue_id);
2107
2108                                 rte_eth_devices[rxq->port_id].data->
2109                                                         rx_mbuf_alloc_failed++;
2110                                 break;
2111                         }
2112                 }
2113
2114                 nb_hold++;
2115                 rxe = &sw_ring[rx_id];
2116                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2117
2118                 next_id = rx_id + 1;
2119                 if (next_id == rxq->nb_rx_desc)
2120                         next_id = 0;
2121
2122                 /* Prefetch next mbuf while processing current one. */
2123                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2124
2125                 /*
2126                  * When next RX descriptor is on a cache-line boundary,
2127                  * prefetch the next 4 RX descriptors and the next 4 pointers
2128                  * to mbufs.
2129                  */
2130                 if ((next_id & 0x3) == 0) {
2131                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2132                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2133                 }
2134
2135                 rxm = rxe->mbuf;
2136
2137                 if (!bulk_alloc) {
2138                         __le64 dma =
2139                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2140                         /*
2141                          * Update RX descriptor with the physical address of the
2142                          * new data buffer of the new allocated mbuf.
2143                          */
2144                         rxe->mbuf = nmb;
2145
2146                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2147                         rxdp->read.hdr_addr = 0;
2148                         rxdp->read.pkt_addr = dma;
2149                 } else
2150                         rxe->mbuf = NULL;
2151
2152                 /*
2153                  * Set data length & data buffer address of mbuf.
2154                  */
2155                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2156                 rxm->data_len = data_len;
2157
2158                 if (!eop) {
2159                         uint16_t nextp_id;
2160                         /*
2161                          * Get next descriptor index:
2162                          *  - For RSC it's in the NEXTP field.
2163                          *  - For a scattered packet - it's just a following
2164                          *    descriptor.
2165                          */
2166                         if (ixgbe_rsc_count(&rxd))
2167                                 nextp_id =
2168                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2169                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2170                         else
2171                                 nextp_id = next_id;
2172
2173                         next_sc_entry = &sw_sc_ring[nextp_id];
2174                         next_rxe = &sw_ring[nextp_id];
2175                         rte_ixgbe_prefetch(next_rxe);
2176                 }
2177
2178                 sc_entry = &sw_sc_ring[rx_id];
2179                 first_seg = sc_entry->fbuf;
2180                 sc_entry->fbuf = NULL;
2181
2182                 /*
2183                  * If this is the first buffer of the received packet,
2184                  * set the pointer to the first mbuf of the packet and
2185                  * initialize its context.
2186                  * Otherwise, update the total length and the number of segments
2187                  * of the current scattered packet, and update the pointer to
2188                  * the last mbuf of the current packet.
2189                  */
2190                 if (first_seg == NULL) {
2191                         first_seg = rxm;
2192                         first_seg->pkt_len = data_len;
2193                         first_seg->nb_segs = 1;
2194                 } else {
2195                         first_seg->pkt_len += data_len;
2196                         first_seg->nb_segs++;
2197                 }
2198
2199                 prev_id = rx_id;
2200                 rx_id = next_id;
2201
2202                 /*
2203                  * If this is not the last buffer of the received packet, update
2204                  * the pointer to the first mbuf at the NEXTP entry in the
2205                  * sw_sc_ring and continue to parse the RX ring.
2206                  */
2207                 if (!eop && next_rxe) {
2208                         rxm->next = next_rxe->mbuf;
2209                         next_sc_entry->fbuf = first_seg;
2210                         goto next_desc;
2211                 }
2212
2213                 /* Initialize the first mbuf of the returned packet */
2214                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2215
2216                 /*
2217                  * Deal with the case, when HW CRC srip is disabled.
2218                  * That can't happen when LRO is enabled, but still could
2219                  * happen for scattered RX mode.
2220                  */
2221                 first_seg->pkt_len -= rxq->crc_len;
2222                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2223                         struct rte_mbuf *lp;
2224
2225                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2226                                 ;
2227
2228                         first_seg->nb_segs--;
2229                         lp->data_len -= rxq->crc_len - rxm->data_len;
2230                         lp->next = NULL;
2231                         rte_pktmbuf_free_seg(rxm);
2232                 } else
2233                         rxm->data_len -= rxq->crc_len;
2234
2235                 /* Prefetch data of first segment, if configured to do so. */
2236                 rte_packet_prefetch((char *)first_seg->buf_addr +
2237                         first_seg->data_off);
2238
2239                 /*
2240                  * Store the mbuf address into the next entry of the array
2241                  * of returned packets.
2242                  */
2243                 rx_pkts[nb_rx++] = first_seg;
2244         }
2245
2246         /*
2247          * Record index of the next RX descriptor to probe.
2248          */
2249         rxq->rx_tail = rx_id;
2250
2251         /*
2252          * If the number of free RX descriptors is greater than the RX free
2253          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2254          * register.
2255          * Update the RDT with the value of the last processed RX descriptor
2256          * minus 1, to guarantee that the RDT register is never equal to the
2257          * RDH register, which creates a "full" ring situtation from the
2258          * hardware point of view...
2259          */
2260         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2261                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2262                            "nb_hold=%u nb_rx=%u",
2263                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2264
2265                 rte_wmb();
2266                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2267                 nb_hold = 0;
2268         }
2269
2270         rxq->nb_rx_hold = nb_hold;
2271         return nb_rx;
2272 }
2273
2274 uint16_t
2275 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2276                                  uint16_t nb_pkts)
2277 {
2278         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2279 }
2280
2281 uint16_t
2282 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2283                                uint16_t nb_pkts)
2284 {
2285         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2286 }
2287
2288 /*********************************************************************
2289  *
2290  *  Queue management functions
2291  *
2292  **********************************************************************/
2293
2294 static void __rte_cold
2295 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2296 {
2297         unsigned i;
2298
2299         if (txq->sw_ring != NULL) {
2300                 for (i = 0; i < txq->nb_tx_desc; i++) {
2301                         if (txq->sw_ring[i].mbuf != NULL) {
2302                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2303                                 txq->sw_ring[i].mbuf = NULL;
2304                         }
2305                 }
2306         }
2307 }
2308
2309 static int
2310 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2311 {
2312         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2313         uint16_t i, tx_last, tx_id;
2314         uint16_t nb_tx_free_last;
2315         uint16_t nb_tx_to_clean;
2316         uint32_t pkt_cnt;
2317
2318         /* Start free mbuf from the next of tx_tail */
2319         tx_last = txq->tx_tail;
2320         tx_id  = swr_ring[tx_last].next_id;
2321
2322         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2323                 return 0;
2324
2325         nb_tx_to_clean = txq->nb_tx_free;
2326         nb_tx_free_last = txq->nb_tx_free;
2327         if (!free_cnt)
2328                 free_cnt = txq->nb_tx_desc;
2329
2330         /* Loop through swr_ring to count the amount of
2331          * freeable mubfs and packets.
2332          */
2333         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2334                 for (i = 0; i < nb_tx_to_clean &&
2335                         pkt_cnt < free_cnt &&
2336                         tx_id != tx_last; i++) {
2337                         if (swr_ring[tx_id].mbuf != NULL) {
2338                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2339                                 swr_ring[tx_id].mbuf = NULL;
2340
2341                                 /*
2342                                  * last segment in the packet,
2343                                  * increment packet count
2344                                  */
2345                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2346                         }
2347
2348                         tx_id = swr_ring[tx_id].next_id;
2349                 }
2350
2351                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2352                         txq->nb_tx_free || tx_id == tx_last)
2353                         break;
2354
2355                 if (pkt_cnt < free_cnt) {
2356                         if (ixgbe_xmit_cleanup(txq))
2357                                 break;
2358
2359                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2360                         nb_tx_free_last = txq->nb_tx_free;
2361                 }
2362         }
2363
2364         return (int)pkt_cnt;
2365 }
2366
2367 static int
2368 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2369                         uint32_t free_cnt)
2370 {
2371         int i, n, cnt;
2372
2373         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2374                 free_cnt = txq->nb_tx_desc;
2375
2376         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2377
2378         for (i = 0; i < cnt; i += n) {
2379                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2380                         break;
2381
2382                 n = ixgbe_tx_free_bufs(txq);
2383
2384                 if (n == 0)
2385                         break;
2386         }
2387
2388         return i;
2389 }
2390
2391 static int
2392 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2393                         uint32_t free_cnt __rte_unused)
2394 {
2395         return -ENOTSUP;
2396 }
2397
2398 int
2399 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2400 {
2401         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2402         if (txq->offloads == 0 &&
2403 #ifdef RTE_LIBRTE_SECURITY
2404                         !(txq->using_ipsec) &&
2405 #endif
2406                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2407                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2408                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2409                                         txq->sw_ring_v != NULL)) {
2410                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2411                 } else {
2412                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2413                 }
2414         }
2415
2416         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2417 }
2418
2419 static void __rte_cold
2420 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2421 {
2422         if (txq != NULL &&
2423             txq->sw_ring != NULL)
2424                 rte_free(txq->sw_ring);
2425 }
2426
2427 static void __rte_cold
2428 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2429 {
2430         if (txq != NULL && txq->ops != NULL) {
2431                 txq->ops->release_mbufs(txq);
2432                 txq->ops->free_swring(txq);
2433                 rte_free(txq);
2434         }
2435 }
2436
2437 void __rte_cold
2438 ixgbe_dev_tx_queue_release(void *txq)
2439 {
2440         ixgbe_tx_queue_release(txq);
2441 }
2442
2443 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2444 static void __rte_cold
2445 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2446 {
2447         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2448         struct ixgbe_tx_entry *txe = txq->sw_ring;
2449         uint16_t prev, i;
2450
2451         /* Zero out HW ring memory */
2452         for (i = 0; i < txq->nb_tx_desc; i++) {
2453                 txq->tx_ring[i] = zeroed_desc;
2454         }
2455
2456         /* Initialize SW ring entries */
2457         prev = (uint16_t) (txq->nb_tx_desc - 1);
2458         for (i = 0; i < txq->nb_tx_desc; i++) {
2459                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2460
2461                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2462                 txe[i].mbuf = NULL;
2463                 txe[i].last_id = i;
2464                 txe[prev].next_id = i;
2465                 prev = i;
2466         }
2467
2468         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2469         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2470
2471         txq->tx_tail = 0;
2472         txq->nb_tx_used = 0;
2473         /*
2474          * Always allow 1 descriptor to be un-allocated to avoid
2475          * a H/W race condition
2476          */
2477         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2478         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2479         txq->ctx_curr = 0;
2480         memset((void *)&txq->ctx_cache, 0,
2481                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2482 }
2483
2484 static const struct ixgbe_txq_ops def_txq_ops = {
2485         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2486         .free_swring = ixgbe_tx_free_swring,
2487         .reset = ixgbe_reset_tx_queue,
2488 };
2489
2490 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2491  * the queue parameters. Used in tx_queue_setup by primary process and then
2492  * in dev_init by secondary process when attaching to an existing ethdev.
2493  */
2494 void __rte_cold
2495 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2496 {
2497         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2498         if ((txq->offloads == 0) &&
2499 #ifdef RTE_LIBRTE_SECURITY
2500                         !(txq->using_ipsec) &&
2501 #endif
2502                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2503                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2504                 dev->tx_pkt_prepare = NULL;
2505                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2506                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2507                                         ixgbe_txq_vec_setup(txq) == 0)) {
2508                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2509                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2510                 } else
2511                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2512         } else {
2513                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2514                 PMD_INIT_LOG(DEBUG,
2515                                 " - offloads = 0x%" PRIx64,
2516                                 txq->offloads);
2517                 PMD_INIT_LOG(DEBUG,
2518                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2519                                 (unsigned long)txq->tx_rs_thresh,
2520                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2521                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2522                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2523         }
2524 }
2525
2526 uint64_t
2527 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2528 {
2529         RTE_SET_USED(dev);
2530
2531         return 0;
2532 }
2533
2534 uint64_t
2535 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2536 {
2537         uint64_t tx_offload_capa;
2538         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2539
2540         tx_offload_capa =
2541                 DEV_TX_OFFLOAD_VLAN_INSERT |
2542                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2543                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2544                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2545                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2546                 DEV_TX_OFFLOAD_TCP_TSO     |
2547                 DEV_TX_OFFLOAD_MULTI_SEGS;
2548
2549         if (hw->mac.type == ixgbe_mac_82599EB ||
2550             hw->mac.type == ixgbe_mac_X540)
2551                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2552
2553         if (hw->mac.type == ixgbe_mac_X550 ||
2554             hw->mac.type == ixgbe_mac_X550EM_x ||
2555             hw->mac.type == ixgbe_mac_X550EM_a)
2556                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2557
2558 #ifdef RTE_LIBRTE_SECURITY
2559         if (dev->security_ctx)
2560                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2561 #endif
2562         return tx_offload_capa;
2563 }
2564
2565 int __rte_cold
2566 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2567                          uint16_t queue_idx,
2568                          uint16_t nb_desc,
2569                          unsigned int socket_id,
2570                          const struct rte_eth_txconf *tx_conf)
2571 {
2572         const struct rte_memzone *tz;
2573         struct ixgbe_tx_queue *txq;
2574         struct ixgbe_hw     *hw;
2575         uint16_t tx_rs_thresh, tx_free_thresh;
2576         uint64_t offloads;
2577
2578         PMD_INIT_FUNC_TRACE();
2579         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2580
2581         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2582
2583         /*
2584          * Validate number of transmit descriptors.
2585          * It must not exceed hardware maximum, and must be multiple
2586          * of IXGBE_ALIGN.
2587          */
2588         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2589                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2590                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2591                 return -EINVAL;
2592         }
2593
2594         /*
2595          * The following two parameters control the setting of the RS bit on
2596          * transmit descriptors.
2597          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2598          * descriptors have been used.
2599          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2600          * descriptors are used or if the number of descriptors required
2601          * to transmit a packet is greater than the number of free TX
2602          * descriptors.
2603          * The following constraints must be satisfied:
2604          *  tx_rs_thresh must be greater than 0.
2605          *  tx_rs_thresh must be less than the size of the ring minus 2.
2606          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2607          *  tx_rs_thresh must be a divisor of the ring size.
2608          *  tx_free_thresh must be greater than 0.
2609          *  tx_free_thresh must be less than the size of the ring minus 3.
2610          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2611          * One descriptor in the TX ring is used as a sentinel to avoid a
2612          * H/W race condition, hence the maximum threshold constraints.
2613          * When set to zero use default values.
2614          */
2615         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2616                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2617         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2618         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2619                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2620         if (tx_conf->tx_rs_thresh > 0)
2621                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2622         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2623                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2624                              "exceed nb_desc. (tx_rs_thresh=%u "
2625                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2626                              (unsigned int)tx_rs_thresh,
2627                              (unsigned int)tx_free_thresh,
2628                              (unsigned int)nb_desc,
2629                              (int)dev->data->port_id,
2630                              (int)queue_idx);
2631                 return -(EINVAL);
2632         }
2633         if (tx_rs_thresh >= (nb_desc - 2)) {
2634                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2635                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2636                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2637                         (int)dev->data->port_id, (int)queue_idx);
2638                 return -(EINVAL);
2639         }
2640         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2641                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2642                         "(tx_rs_thresh=%u port=%d queue=%d)",
2643                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2644                         (int)dev->data->port_id, (int)queue_idx);
2645                 return -(EINVAL);
2646         }
2647         if (tx_free_thresh >= (nb_desc - 3)) {
2648                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2649                              "tx_free_thresh must be less than the number of "
2650                              "TX descriptors minus 3. (tx_free_thresh=%u "
2651                              "port=%d queue=%d)",
2652                              (unsigned int)tx_free_thresh,
2653                              (int)dev->data->port_id, (int)queue_idx);
2654                 return -(EINVAL);
2655         }
2656         if (tx_rs_thresh > tx_free_thresh) {
2657                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2658                              "tx_free_thresh. (tx_free_thresh=%u "
2659                              "tx_rs_thresh=%u port=%d queue=%d)",
2660                              (unsigned int)tx_free_thresh,
2661                              (unsigned int)tx_rs_thresh,
2662                              (int)dev->data->port_id,
2663                              (int)queue_idx);
2664                 return -(EINVAL);
2665         }
2666         if ((nb_desc % tx_rs_thresh) != 0) {
2667                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2668                              "number of TX descriptors. (tx_rs_thresh=%u "
2669                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2670                              (int)dev->data->port_id, (int)queue_idx);
2671                 return -(EINVAL);
2672         }
2673
2674         /*
2675          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2676          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2677          * by the NIC and all descriptors are written back after the NIC
2678          * accumulates WTHRESH descriptors.
2679          */
2680         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2681                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2682                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2683                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2684                              (int)dev->data->port_id, (int)queue_idx);
2685                 return -(EINVAL);
2686         }
2687
2688         /* Free memory prior to re-allocation if needed... */
2689         if (dev->data->tx_queues[queue_idx] != NULL) {
2690                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2691                 dev->data->tx_queues[queue_idx] = NULL;
2692         }
2693
2694         /* First allocate the tx queue data structure */
2695         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2696                                  RTE_CACHE_LINE_SIZE, socket_id);
2697         if (txq == NULL)
2698                 return -ENOMEM;
2699
2700         /*
2701          * Allocate TX ring hardware descriptors. A memzone large enough to
2702          * handle the maximum ring size is allocated in order to allow for
2703          * resizing in later calls to the queue setup function.
2704          */
2705         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2706                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2707                         IXGBE_ALIGN, socket_id);
2708         if (tz == NULL) {
2709                 ixgbe_tx_queue_release(txq);
2710                 return -ENOMEM;
2711         }
2712
2713         txq->nb_tx_desc = nb_desc;
2714         txq->tx_rs_thresh = tx_rs_thresh;
2715         txq->tx_free_thresh = tx_free_thresh;
2716         txq->pthresh = tx_conf->tx_thresh.pthresh;
2717         txq->hthresh = tx_conf->tx_thresh.hthresh;
2718         txq->wthresh = tx_conf->tx_thresh.wthresh;
2719         txq->queue_id = queue_idx;
2720         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2721                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2722         txq->port_id = dev->data->port_id;
2723         txq->offloads = offloads;
2724         txq->ops = &def_txq_ops;
2725         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2726 #ifdef RTE_LIBRTE_SECURITY
2727         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2728                         DEV_TX_OFFLOAD_SECURITY);
2729 #endif
2730
2731         /*
2732          * Modification to set VFTDT for virtual function if vf is detected
2733          */
2734         if (hw->mac.type == ixgbe_mac_82599_vf ||
2735             hw->mac.type == ixgbe_mac_X540_vf ||
2736             hw->mac.type == ixgbe_mac_X550_vf ||
2737             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2738             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2739                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2740         else
2741                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2742
2743         txq->tx_ring_phys_addr = tz->iova;
2744         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2745
2746         /* Allocate software ring */
2747         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2748                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2749                                 RTE_CACHE_LINE_SIZE, socket_id);
2750         if (txq->sw_ring == NULL) {
2751                 ixgbe_tx_queue_release(txq);
2752                 return -ENOMEM;
2753         }
2754         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2755                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2756
2757         /* set up vector or scalar TX function as appropriate */
2758         ixgbe_set_tx_function(dev, txq);
2759
2760         txq->ops->reset(txq);
2761
2762         dev->data->tx_queues[queue_idx] = txq;
2763
2764
2765         return 0;
2766 }
2767
2768 /**
2769  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2770  *
2771  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2772  * in the sw_rsc_ring is not set to NULL but rather points to the next
2773  * mbuf of this RSC aggregation (that has not been completed yet and still
2774  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2775  * will just free first "nb_segs" segments of the cluster explicitly by calling
2776  * an rte_pktmbuf_free_seg().
2777  *
2778  * @m scattered cluster head
2779  */
2780 static void __rte_cold
2781 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2782 {
2783         uint16_t i, nb_segs = m->nb_segs;
2784         struct rte_mbuf *next_seg;
2785
2786         for (i = 0; i < nb_segs; i++) {
2787                 next_seg = m->next;
2788                 rte_pktmbuf_free_seg(m);
2789                 m = next_seg;
2790         }
2791 }
2792
2793 static void __rte_cold
2794 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2795 {
2796         unsigned i;
2797
2798         /* SSE Vector driver has a different way of releasing mbufs. */
2799         if (rxq->rx_using_sse) {
2800                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2801                 return;
2802         }
2803
2804         if (rxq->sw_ring != NULL) {
2805                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2806                         if (rxq->sw_ring[i].mbuf != NULL) {
2807                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2808                                 rxq->sw_ring[i].mbuf = NULL;
2809                         }
2810                 }
2811                 if (rxq->rx_nb_avail) {
2812                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2813                                 struct rte_mbuf *mb;
2814
2815                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2816                                 rte_pktmbuf_free_seg(mb);
2817                         }
2818                         rxq->rx_nb_avail = 0;
2819                 }
2820         }
2821
2822         if (rxq->sw_sc_ring)
2823                 for (i = 0; i < rxq->nb_rx_desc; i++)
2824                         if (rxq->sw_sc_ring[i].fbuf) {
2825                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2826                                 rxq->sw_sc_ring[i].fbuf = NULL;
2827                         }
2828 }
2829
2830 static void __rte_cold
2831 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2832 {
2833         if (rxq != NULL) {
2834                 ixgbe_rx_queue_release_mbufs(rxq);
2835                 rte_free(rxq->sw_ring);
2836                 rte_free(rxq->sw_sc_ring);
2837                 rte_free(rxq);
2838         }
2839 }
2840
2841 void __rte_cold
2842 ixgbe_dev_rx_queue_release(void *rxq)
2843 {
2844         ixgbe_rx_queue_release(rxq);
2845 }
2846
2847 /*
2848  * Check if Rx Burst Bulk Alloc function can be used.
2849  * Return
2850  *        0: the preconditions are satisfied and the bulk allocation function
2851  *           can be used.
2852  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2853  *           function must be used.
2854  */
2855 static inline int __rte_cold
2856 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2857 {
2858         int ret = 0;
2859
2860         /*
2861          * Make sure the following pre-conditions are satisfied:
2862          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2863          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2864          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2865          * Scattered packets are not supported.  This should be checked
2866          * outside of this function.
2867          */
2868         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2869                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2870                              "rxq->rx_free_thresh=%d, "
2871                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2872                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2873                 ret = -EINVAL;
2874         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2875                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2876                              "rxq->rx_free_thresh=%d, "
2877                              "rxq->nb_rx_desc=%d",
2878                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2879                 ret = -EINVAL;
2880         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2881                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2882                              "rxq->nb_rx_desc=%d, "
2883                              "rxq->rx_free_thresh=%d",
2884                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2885                 ret = -EINVAL;
2886         }
2887
2888         return ret;
2889 }
2890
2891 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2892 static void __rte_cold
2893 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2894 {
2895         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2896         unsigned i;
2897         uint16_t len = rxq->nb_rx_desc;
2898
2899         /*
2900          * By default, the Rx queue setup function allocates enough memory for
2901          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2902          * extra memory at the end of the descriptor ring to be zero'd out.
2903          */
2904         if (adapter->rx_bulk_alloc_allowed)
2905                 /* zero out extra memory */
2906                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2907
2908         /*
2909          * Zero out HW ring memory. Zero out extra memory at the end of
2910          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2911          * reads extra memory as zeros.
2912          */
2913         for (i = 0; i < len; i++) {
2914                 rxq->rx_ring[i] = zeroed_desc;
2915         }
2916
2917         /*
2918          * initialize extra software ring entries. Space for these extra
2919          * entries is always allocated
2920          */
2921         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2922         for (i = rxq->nb_rx_desc; i < len; ++i) {
2923                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2924         }
2925
2926         rxq->rx_nb_avail = 0;
2927         rxq->rx_next_avail = 0;
2928         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2929         rxq->rx_tail = 0;
2930         rxq->nb_rx_hold = 0;
2931         rxq->pkt_first_seg = NULL;
2932         rxq->pkt_last_seg = NULL;
2933
2934 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2935         rxq->rxrearm_start = 0;
2936         rxq->rxrearm_nb = 0;
2937 #endif
2938 }
2939
2940 static int
2941 ixgbe_is_vf(struct rte_eth_dev *dev)
2942 {
2943         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2944
2945         switch (hw->mac.type) {
2946         case ixgbe_mac_82599_vf:
2947         case ixgbe_mac_X540_vf:
2948         case ixgbe_mac_X550_vf:
2949         case ixgbe_mac_X550EM_x_vf:
2950         case ixgbe_mac_X550EM_a_vf:
2951                 return 1;
2952         default:
2953                 return 0;
2954         }
2955 }
2956
2957 uint64_t
2958 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2959 {
2960         uint64_t offloads = 0;
2961         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2962
2963         if (hw->mac.type != ixgbe_mac_82598EB)
2964                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2965
2966         return offloads;
2967 }
2968
2969 uint64_t
2970 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2971 {
2972         uint64_t offloads;
2973         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2974
2975         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2976                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2977                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2978                    DEV_RX_OFFLOAD_KEEP_CRC    |
2979                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2980                    DEV_RX_OFFLOAD_VLAN_FILTER |
2981                    DEV_RX_OFFLOAD_SCATTER |
2982                    DEV_RX_OFFLOAD_RSS_HASH;
2983
2984         if (hw->mac.type == ixgbe_mac_82598EB)
2985                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2986
2987         if (ixgbe_is_vf(dev) == 0)
2988                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2989
2990         /*
2991          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2992          * mode.
2993          */
2994         if ((hw->mac.type == ixgbe_mac_82599EB ||
2995              hw->mac.type == ixgbe_mac_X540 ||
2996              hw->mac.type == ixgbe_mac_X550) &&
2997             !RTE_ETH_DEV_SRIOV(dev).active)
2998                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
2999
3000         if (hw->mac.type == ixgbe_mac_82599EB ||
3001             hw->mac.type == ixgbe_mac_X540)
3002                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3003
3004         if (hw->mac.type == ixgbe_mac_X550 ||
3005             hw->mac.type == ixgbe_mac_X550EM_x ||
3006             hw->mac.type == ixgbe_mac_X550EM_a)
3007                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3008
3009 #ifdef RTE_LIBRTE_SECURITY
3010         if (dev->security_ctx)
3011                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3012 #endif
3013
3014         return offloads;
3015 }
3016
3017 int __rte_cold
3018 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3019                          uint16_t queue_idx,
3020                          uint16_t nb_desc,
3021                          unsigned int socket_id,
3022                          const struct rte_eth_rxconf *rx_conf,
3023                          struct rte_mempool *mp)
3024 {
3025         const struct rte_memzone *rz;
3026         struct ixgbe_rx_queue *rxq;
3027         struct ixgbe_hw     *hw;
3028         uint16_t len;
3029         struct ixgbe_adapter *adapter = dev->data->dev_private;
3030         uint64_t offloads;
3031
3032         PMD_INIT_FUNC_TRACE();
3033         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3034
3035         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3036
3037         /*
3038          * Validate number of receive descriptors.
3039          * It must not exceed hardware maximum, and must be multiple
3040          * of IXGBE_ALIGN.
3041          */
3042         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3043                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3044                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3045                 return -EINVAL;
3046         }
3047
3048         /* Free memory prior to re-allocation if needed... */
3049         if (dev->data->rx_queues[queue_idx] != NULL) {
3050                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3051                 dev->data->rx_queues[queue_idx] = NULL;
3052         }
3053
3054         /* First allocate the rx queue data structure */
3055         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3056                                  RTE_CACHE_LINE_SIZE, socket_id);
3057         if (rxq == NULL)
3058                 return -ENOMEM;
3059         rxq->mb_pool = mp;
3060         rxq->nb_rx_desc = nb_desc;
3061         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3062         rxq->queue_id = queue_idx;
3063         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3064                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3065         rxq->port_id = dev->data->port_id;
3066         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3067                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3068         else
3069                 rxq->crc_len = 0;
3070         rxq->drop_en = rx_conf->rx_drop_en;
3071         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3072         rxq->offloads = offloads;
3073
3074         /*
3075          * The packet type in RX descriptor is different for different NICs.
3076          * Some bits are used for x550 but reserved for other NICS.
3077          * So set different masks for different NICs.
3078          */
3079         if (hw->mac.type == ixgbe_mac_X550 ||
3080             hw->mac.type == ixgbe_mac_X550EM_x ||
3081             hw->mac.type == ixgbe_mac_X550EM_a ||
3082             hw->mac.type == ixgbe_mac_X550_vf ||
3083             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3084             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3085                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3086         else
3087                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3088
3089         /*
3090          * Allocate RX ring hardware descriptors. A memzone large enough to
3091          * handle the maximum ring size is allocated in order to allow for
3092          * resizing in later calls to the queue setup function.
3093          */
3094         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3095                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3096         if (rz == NULL) {
3097                 ixgbe_rx_queue_release(rxq);
3098                 return -ENOMEM;
3099         }
3100
3101         /*
3102          * Zero init all the descriptors in the ring.
3103          */
3104         memset(rz->addr, 0, RX_RING_SZ);
3105
3106         /*
3107          * Modified to setup VFRDT for Virtual Function
3108          */
3109         if (hw->mac.type == ixgbe_mac_82599_vf ||
3110             hw->mac.type == ixgbe_mac_X540_vf ||
3111             hw->mac.type == ixgbe_mac_X550_vf ||
3112             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3113             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3114                 rxq->rdt_reg_addr =
3115                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3116                 rxq->rdh_reg_addr =
3117                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3118         } else {
3119                 rxq->rdt_reg_addr =
3120                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3121                 rxq->rdh_reg_addr =
3122                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3123         }
3124
3125         rxq->rx_ring_phys_addr = rz->iova;
3126         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3127
3128         /*
3129          * Certain constraints must be met in order to use the bulk buffer
3130          * allocation Rx burst function. If any of Rx queues doesn't meet them
3131          * the feature should be disabled for the whole port.
3132          */
3133         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3134                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3135                                     "preconditions - canceling the feature for "
3136                                     "the whole port[%d]",
3137                              rxq->queue_id, rxq->port_id);
3138                 adapter->rx_bulk_alloc_allowed = false;
3139         }
3140
3141         /*
3142          * Allocate software ring. Allow for space at the end of the
3143          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3144          * function does not access an invalid memory region.
3145          */
3146         len = nb_desc;
3147         if (adapter->rx_bulk_alloc_allowed)
3148                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3149
3150         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3151                                           sizeof(struct ixgbe_rx_entry) * len,
3152                                           RTE_CACHE_LINE_SIZE, socket_id);
3153         if (!rxq->sw_ring) {
3154                 ixgbe_rx_queue_release(rxq);
3155                 return -ENOMEM;
3156         }
3157
3158         /*
3159          * Always allocate even if it's not going to be needed in order to
3160          * simplify the code.
3161          *
3162          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3163          * be requested in ixgbe_dev_rx_init(), which is called later from
3164          * dev_start() flow.
3165          */
3166         rxq->sw_sc_ring =
3167                 rte_zmalloc_socket("rxq->sw_sc_ring",
3168                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3169                                    RTE_CACHE_LINE_SIZE, socket_id);
3170         if (!rxq->sw_sc_ring) {
3171                 ixgbe_rx_queue_release(rxq);
3172                 return -ENOMEM;
3173         }
3174
3175         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3176                             "dma_addr=0x%"PRIx64,
3177                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3178                      rxq->rx_ring_phys_addr);
3179
3180         if (!rte_is_power_of_2(nb_desc)) {
3181                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3182                                     "preconditions - canceling the feature for "
3183                                     "the whole port[%d]",
3184                              rxq->queue_id, rxq->port_id);
3185                 adapter->rx_vec_allowed = false;
3186         } else
3187                 ixgbe_rxq_vec_setup(rxq);
3188
3189         dev->data->rx_queues[queue_idx] = rxq;
3190
3191         ixgbe_reset_rx_queue(adapter, rxq);
3192
3193         return 0;
3194 }
3195
3196 uint32_t
3197 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3198 {
3199 #define IXGBE_RXQ_SCAN_INTERVAL 4
3200         volatile union ixgbe_adv_rx_desc *rxdp;
3201         struct ixgbe_rx_queue *rxq;
3202         uint32_t desc = 0;
3203
3204         rxq = dev->data->rx_queues[rx_queue_id];
3205         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3206
3207         while ((desc < rxq->nb_rx_desc) &&
3208                 (rxdp->wb.upper.status_error &
3209                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3210                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3211                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3212                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3213                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3214                                 desc - rxq->nb_rx_desc]);
3215         }
3216
3217         return desc;
3218 }
3219
3220 int
3221 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3222 {
3223         volatile union ixgbe_adv_rx_desc *rxdp;
3224         struct ixgbe_rx_queue *rxq = rx_queue;
3225         uint32_t desc;
3226
3227         if (unlikely(offset >= rxq->nb_rx_desc))
3228                 return 0;
3229         desc = rxq->rx_tail + offset;
3230         if (desc >= rxq->nb_rx_desc)
3231                 desc -= rxq->nb_rx_desc;
3232
3233         rxdp = &rxq->rx_ring[desc];
3234         return !!(rxdp->wb.upper.status_error &
3235                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3236 }
3237
3238 int
3239 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3240 {
3241         struct ixgbe_rx_queue *rxq = rx_queue;
3242         volatile uint32_t *status;
3243         uint32_t nb_hold, desc;
3244
3245         if (unlikely(offset >= rxq->nb_rx_desc))
3246                 return -EINVAL;
3247
3248 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3249         if (rxq->rx_using_sse)
3250                 nb_hold = rxq->rxrearm_nb;
3251         else
3252 #endif
3253                 nb_hold = rxq->nb_rx_hold;
3254         if (offset >= rxq->nb_rx_desc - nb_hold)
3255                 return RTE_ETH_RX_DESC_UNAVAIL;
3256
3257         desc = rxq->rx_tail + offset;
3258         if (desc >= rxq->nb_rx_desc)
3259                 desc -= rxq->nb_rx_desc;
3260
3261         status = &rxq->rx_ring[desc].wb.upper.status_error;
3262         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3263                 return RTE_ETH_RX_DESC_DONE;
3264
3265         return RTE_ETH_RX_DESC_AVAIL;
3266 }
3267
3268 int
3269 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3270 {
3271         struct ixgbe_tx_queue *txq = tx_queue;
3272         volatile uint32_t *status;
3273         uint32_t desc;
3274
3275         if (unlikely(offset >= txq->nb_tx_desc))
3276                 return -EINVAL;
3277
3278         desc = txq->tx_tail + offset;
3279         /* go to next desc that has the RS bit */
3280         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3281                 txq->tx_rs_thresh;
3282         if (desc >= txq->nb_tx_desc) {
3283                 desc -= txq->nb_tx_desc;
3284                 if (desc >= txq->nb_tx_desc)
3285                         desc -= txq->nb_tx_desc;
3286         }
3287
3288         status = &txq->tx_ring[desc].wb.status;
3289         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3290                 return RTE_ETH_TX_DESC_DONE;
3291
3292         return RTE_ETH_TX_DESC_FULL;
3293 }
3294
3295 /*
3296  * Set up link loopback for X540/X550 mode Tx->Rx.
3297  */
3298 static inline void __rte_cold
3299 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3300 {
3301         uint32_t macc;
3302         PMD_INIT_FUNC_TRACE();
3303
3304         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3305
3306         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3307                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3308         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3309
3310         if (enable) {
3311                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3312                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3313                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3314                 macc |= IXGBE_MACC_FLU;
3315         } else {
3316                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3317                 macc &= ~IXGBE_MACC_FLU;
3318         }
3319
3320         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3321                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3322
3323         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3324 }
3325
3326 void __rte_cold
3327 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3328 {
3329         unsigned i;
3330         struct ixgbe_adapter *adapter = dev->data->dev_private;
3331         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3332
3333         PMD_INIT_FUNC_TRACE();
3334
3335         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3336                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3337
3338                 if (txq != NULL) {
3339                         txq->ops->release_mbufs(txq);
3340                         txq->ops->reset(txq);
3341                 }
3342         }
3343
3344         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3345                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3346
3347                 if (rxq != NULL) {
3348                         ixgbe_rx_queue_release_mbufs(rxq);
3349                         ixgbe_reset_rx_queue(adapter, rxq);
3350                 }
3351         }
3352         /* If loopback mode was enabled, reconfigure the link accordingly */
3353         if (dev->data->dev_conf.lpbk_mode != 0) {
3354                 if (hw->mac.type == ixgbe_mac_X540 ||
3355                      hw->mac.type == ixgbe_mac_X550 ||
3356                      hw->mac.type == ixgbe_mac_X550EM_x ||
3357                      hw->mac.type == ixgbe_mac_X550EM_a)
3358                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3359         }
3360 }
3361
3362 void
3363 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3364 {
3365         unsigned i;
3366
3367         PMD_INIT_FUNC_TRACE();
3368
3369         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3370                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3371                 dev->data->rx_queues[i] = NULL;
3372                 rte_eth_dma_zone_free(dev, "rx_ring", i);
3373         }
3374         dev->data->nb_rx_queues = 0;
3375
3376         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3377                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3378                 dev->data->tx_queues[i] = NULL;
3379                 rte_eth_dma_zone_free(dev, "tx_ring", i);
3380         }
3381         dev->data->nb_tx_queues = 0;
3382 }
3383
3384 /*********************************************************************
3385  *
3386  *  Device RX/TX init functions
3387  *
3388  **********************************************************************/
3389
3390 /**
3391  * Receive Side Scaling (RSS)
3392  * See section 7.1.2.8 in the following document:
3393  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3394  *
3395  * Principles:
3396  * The source and destination IP addresses of the IP header and the source
3397  * and destination ports of TCP/UDP headers, if any, of received packets are
3398  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3399  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3400  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3401  * RSS output index which is used as the RX queue index where to store the
3402  * received packets.
3403  * The following output is supplied in the RX write-back descriptor:
3404  *     - 32-bit result of the Microsoft RSS hash function,
3405  *     - 4-bit RSS type field.
3406  */
3407
3408 /*
3409  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3410  * Used as the default key.
3411  */
3412 static uint8_t rss_intel_key[40] = {
3413         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3414         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3415         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3416         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3417         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3418 };
3419
3420 static void
3421 ixgbe_rss_disable(struct rte_eth_dev *dev)
3422 {
3423         struct ixgbe_hw *hw;
3424         uint32_t mrqc;
3425         uint32_t mrqc_reg;
3426
3427         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3428         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3429         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3430         mrqc &= ~IXGBE_MRQC_RSSEN;
3431         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3432 }
3433
3434 static void
3435 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3436 {
3437         uint8_t  *hash_key;
3438         uint32_t mrqc;
3439         uint32_t rss_key;
3440         uint64_t rss_hf;
3441         uint16_t i;
3442         uint32_t mrqc_reg;
3443         uint32_t rssrk_reg;
3444
3445         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3446         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3447
3448         hash_key = rss_conf->rss_key;
3449         if (hash_key != NULL) {
3450                 /* Fill in RSS hash key */
3451                 for (i = 0; i < 10; i++) {
3452                         rss_key  = hash_key[(i * 4)];
3453                         rss_key |= hash_key[(i * 4) + 1] << 8;
3454                         rss_key |= hash_key[(i * 4) + 2] << 16;
3455                         rss_key |= hash_key[(i * 4) + 3] << 24;
3456                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3457                 }
3458         }
3459
3460         /* Set configured hashing protocols in MRQC register */
3461         rss_hf = rss_conf->rss_hf;
3462         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3463         if (rss_hf & ETH_RSS_IPV4)
3464                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3465         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3466                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3467         if (rss_hf & ETH_RSS_IPV6)
3468                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3469         if (rss_hf & ETH_RSS_IPV6_EX)
3470                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3471         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3472                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3473         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3474                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3475         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3476                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3477         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3478                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3479         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3480                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3481         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3482 }
3483
3484 int
3485 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3486                           struct rte_eth_rss_conf *rss_conf)
3487 {
3488         struct ixgbe_hw *hw;
3489         uint32_t mrqc;
3490         uint64_t rss_hf;
3491         uint32_t mrqc_reg;
3492
3493         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3494
3495         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3496                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3497                         "NIC.");
3498                 return -ENOTSUP;
3499         }
3500         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3501
3502         /*
3503          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3504          *     "RSS enabling cannot be done dynamically while it must be
3505          *      preceded by a software reset"
3506          * Before changing anything, first check that the update RSS operation
3507          * does not attempt to disable RSS, if RSS was enabled at
3508          * initialization time, or does not attempt to enable RSS, if RSS was
3509          * disabled at initialization time.
3510          */
3511         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3512         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3513         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3514                 if (rss_hf != 0) /* Enable RSS */
3515                         return -(EINVAL);
3516                 return 0; /* Nothing to do */
3517         }
3518         /* RSS enabled */
3519         if (rss_hf == 0) /* Disable RSS */
3520                 return -(EINVAL);
3521         ixgbe_hw_rss_hash_set(hw, rss_conf);
3522         return 0;
3523 }
3524
3525 int
3526 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3527                             struct rte_eth_rss_conf *rss_conf)
3528 {
3529         struct ixgbe_hw *hw;
3530         uint8_t *hash_key;
3531         uint32_t mrqc;
3532         uint32_t rss_key;
3533         uint64_t rss_hf;
3534         uint16_t i;
3535         uint32_t mrqc_reg;
3536         uint32_t rssrk_reg;
3537
3538         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3539         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3540         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3541         hash_key = rss_conf->rss_key;
3542         if (hash_key != NULL) {
3543                 /* Return RSS hash key */
3544                 for (i = 0; i < 10; i++) {
3545                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3546                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3547                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3548                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3549                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3550                 }
3551         }
3552
3553         /* Get RSS functions configured in MRQC register */
3554         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3555         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3556                 rss_conf->rss_hf = 0;
3557                 return 0;
3558         }
3559         rss_hf = 0;
3560         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3561                 rss_hf |= ETH_RSS_IPV4;
3562         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3563                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3564         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3565                 rss_hf |= ETH_RSS_IPV6;
3566         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3567                 rss_hf |= ETH_RSS_IPV6_EX;
3568         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3569                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3570         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3571                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3572         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3573                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3574         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3575                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3576         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3577                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3578         rss_conf->rss_hf = rss_hf;
3579         return 0;
3580 }
3581
3582 static void
3583 ixgbe_rss_configure(struct rte_eth_dev *dev)
3584 {
3585         struct rte_eth_rss_conf rss_conf;
3586         struct ixgbe_adapter *adapter;
3587         struct ixgbe_hw *hw;
3588         uint32_t reta;
3589         uint16_t i;
3590         uint16_t j;
3591         uint16_t sp_reta_size;
3592         uint32_t reta_reg;
3593
3594         PMD_INIT_FUNC_TRACE();
3595         adapter = dev->data->dev_private;
3596         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3597
3598         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3599
3600         /*
3601          * Fill in redirection table
3602          * The byte-swap is needed because NIC registers are in
3603          * little-endian order.
3604          */
3605         if (adapter->rss_reta_updated == 0) {
3606                 reta = 0;
3607                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3608                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3609
3610                         if (j == dev->data->nb_rx_queues)
3611                                 j = 0;
3612                         reta = (reta << 8) | j;
3613                         if ((i & 3) == 3)
3614                                 IXGBE_WRITE_REG(hw, reta_reg,
3615                                                 rte_bswap32(reta));
3616                 }
3617         }
3618
3619         /*
3620          * Configure the RSS key and the RSS protocols used to compute
3621          * the RSS hash of input packets.
3622          */
3623         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3624         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3625                 ixgbe_rss_disable(dev);
3626                 return;
3627         }
3628         if (rss_conf.rss_key == NULL)
3629                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3630         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3631 }
3632
3633 #define NUM_VFTA_REGISTERS 128
3634 #define NIC_RX_BUFFER_SIZE 0x200
3635 #define X550_RX_BUFFER_SIZE 0x180
3636
3637 static void
3638 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3639 {
3640         struct rte_eth_vmdq_dcb_conf *cfg;
3641         struct ixgbe_hw *hw;
3642         enum rte_eth_nb_pools num_pools;
3643         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3644         uint16_t pbsize;
3645         uint8_t nb_tcs; /* number of traffic classes */
3646         int i;
3647
3648         PMD_INIT_FUNC_TRACE();
3649         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3650         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3651         num_pools = cfg->nb_queue_pools;
3652         /* Check we have a valid number of pools */
3653         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3654                 ixgbe_rss_disable(dev);
3655                 return;
3656         }
3657         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3658         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3659
3660         /*
3661          * RXPBSIZE
3662          * split rx buffer up into sections, each for 1 traffic class
3663          */
3664         switch (hw->mac.type) {
3665         case ixgbe_mac_X550:
3666         case ixgbe_mac_X550EM_x:
3667         case ixgbe_mac_X550EM_a:
3668                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3669                 break;
3670         default:
3671                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3672                 break;
3673         }
3674         for (i = 0; i < nb_tcs; i++) {
3675                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3676
3677                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3678                 /* clear 10 bits. */
3679                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3680                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3681         }
3682         /* zero alloc all unused TCs */
3683         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3684                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3685
3686                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3687                 /* clear 10 bits. */
3688                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3689         }
3690
3691         /* MRQC: enable vmdq and dcb */
3692         mrqc = (num_pools == ETH_16_POOLS) ?
3693                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3694         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3695
3696         /* PFVTCTL: turn on virtualisation and set the default pool */
3697         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3698         if (cfg->enable_default_pool) {
3699                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3700         } else {
3701                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3702         }
3703
3704         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3705
3706         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3707         queue_mapping = 0;
3708         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3709                 /*
3710                  * mapping is done with 3 bits per priority,
3711                  * so shift by i*3 each time
3712                  */
3713                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3714
3715         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3716
3717         /* RTRPCS: DCB related */
3718         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3719
3720         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3721         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3722         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3723         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3724
3725         /* VFTA - enable all vlan filters */
3726         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3727                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3728         }
3729
3730         /* VFRE: pool enabling for receive - 16 or 32 */
3731         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3732                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3733
3734         /*
3735          * MPSAR - allow pools to read specific mac addresses
3736          * In this case, all pools should be able to read from mac addr 0
3737          */
3738         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3739         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3740
3741         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3742         for (i = 0; i < cfg->nb_pool_maps; i++) {
3743                 /* set vlan id in VF register and set the valid bit */
3744                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3745                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3746                 /*
3747                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3748                  * pools, we only need to use the first half of the register
3749                  * i.e. bits 0-31
3750                  */
3751                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3752         }
3753 }
3754
3755 /**
3756  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3757  * @dev: pointer to eth_dev structure
3758  * @dcb_config: pointer to ixgbe_dcb_config structure
3759  */
3760 static void
3761 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3762                        struct ixgbe_dcb_config *dcb_config)
3763 {
3764         uint32_t reg;
3765         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3766
3767         PMD_INIT_FUNC_TRACE();
3768         if (hw->mac.type != ixgbe_mac_82598EB) {
3769                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3770                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3771                 reg |= IXGBE_RTTDCS_ARBDIS;
3772                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3773
3774                 /* Enable DCB for Tx with 8 TCs */
3775                 if (dcb_config->num_tcs.pg_tcs == 8) {
3776                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3777                 } else {
3778                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3779                 }
3780                 if (dcb_config->vt_mode)
3781                         reg |= IXGBE_MTQC_VT_ENA;
3782                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3783
3784                 /* Enable the Tx desc arbiter */
3785                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3786                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3787                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3788
3789                 /* Enable Security TX Buffer IFG for DCB */
3790                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3791                 reg |= IXGBE_SECTX_DCB;
3792                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3793         }
3794 }
3795
3796 /**
3797  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3798  * @dev: pointer to rte_eth_dev structure
3799  * @dcb_config: pointer to ixgbe_dcb_config structure
3800  */
3801 static void
3802 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3803                         struct ixgbe_dcb_config *dcb_config)
3804 {
3805         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3806                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3807         struct ixgbe_hw *hw =
3808                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3809
3810         PMD_INIT_FUNC_TRACE();
3811         if (hw->mac.type != ixgbe_mac_82598EB)
3812                 /*PF VF Transmit Enable*/
3813                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3814                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3815
3816         /*Configure general DCB TX parameters*/
3817         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3818 }
3819
3820 static void
3821 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3822                         struct ixgbe_dcb_config *dcb_config)
3823 {
3824         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3825                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3826         struct ixgbe_dcb_tc_config *tc;
3827         uint8_t i, j;
3828
3829         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3830         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3831                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3832                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3833         } else {
3834                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3835                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3836         }
3837
3838         /* Initialize User Priority to Traffic Class mapping */
3839         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3840                 tc = &dcb_config->tc_config[j];
3841                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3842         }
3843
3844         /* User Priority to Traffic Class mapping */
3845         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3846                 j = vmdq_rx_conf->dcb_tc[i];
3847                 tc = &dcb_config->tc_config[j];
3848                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3849                                                 (uint8_t)(1 << i);
3850         }
3851 }
3852
3853 static void
3854 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3855                         struct ixgbe_dcb_config *dcb_config)
3856 {
3857         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3858                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3859         struct ixgbe_dcb_tc_config *tc;
3860         uint8_t i, j;
3861
3862         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3863         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3864                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3865                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3866         } else {
3867                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3868                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3869         }
3870
3871         /* Initialize User Priority to Traffic Class mapping */
3872         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3873                 tc = &dcb_config->tc_config[j];
3874                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3875         }
3876
3877         /* User Priority to Traffic Class mapping */
3878         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3879                 j = vmdq_tx_conf->dcb_tc[i];
3880                 tc = &dcb_config->tc_config[j];
3881                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3882                                                 (uint8_t)(1 << i);
3883         }
3884 }
3885
3886 static void
3887 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3888                 struct ixgbe_dcb_config *dcb_config)
3889 {
3890         struct rte_eth_dcb_rx_conf *rx_conf =
3891                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3892         struct ixgbe_dcb_tc_config *tc;
3893         uint8_t i, j;
3894
3895         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3896         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3897
3898         /* Initialize User Priority to Traffic Class mapping */
3899         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3900                 tc = &dcb_config->tc_config[j];
3901                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3902         }
3903
3904         /* User Priority to Traffic Class mapping */
3905         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3906                 j = rx_conf->dcb_tc[i];
3907                 tc = &dcb_config->tc_config[j];
3908                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3909                                                 (uint8_t)(1 << i);
3910         }
3911 }
3912
3913 static void
3914 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3915                 struct ixgbe_dcb_config *dcb_config)
3916 {
3917         struct rte_eth_dcb_tx_conf *tx_conf =
3918                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3919         struct ixgbe_dcb_tc_config *tc;
3920         uint8_t i, j;
3921
3922         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3923         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3924
3925         /* Initialize User Priority to Traffic Class mapping */
3926         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3927                 tc = &dcb_config->tc_config[j];
3928                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3929         }
3930
3931         /* User Priority to Traffic Class mapping */
3932         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3933                 j = tx_conf->dcb_tc[i];
3934                 tc = &dcb_config->tc_config[j];
3935                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3936                                                 (uint8_t)(1 << i);
3937         }
3938 }
3939
3940 /**
3941  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3942  * @dev: pointer to eth_dev structure
3943  * @dcb_config: pointer to ixgbe_dcb_config structure
3944  */
3945 static void
3946 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3947                        struct ixgbe_dcb_config *dcb_config)
3948 {
3949         uint32_t reg;
3950         uint32_t vlanctrl;
3951         uint8_t i;
3952         uint32_t q;
3953         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3954
3955         PMD_INIT_FUNC_TRACE();
3956         /*
3957          * Disable the arbiter before changing parameters
3958          * (always enable recycle mode; WSP)
3959          */
3960         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3961         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3962
3963         if (hw->mac.type != ixgbe_mac_82598EB) {
3964                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3965                 if (dcb_config->num_tcs.pg_tcs == 4) {
3966                         if (dcb_config->vt_mode)
3967                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3968                                         IXGBE_MRQC_VMDQRT4TCEN;
3969                         else {
3970                                 /* no matter the mode is DCB or DCB_RSS, just
3971                                  * set the MRQE to RSSXTCEN. RSS is controlled
3972                                  * by RSS_FIELD
3973                                  */
3974                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3975                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3976                                         IXGBE_MRQC_RTRSS4TCEN;
3977                         }
3978                 }
3979                 if (dcb_config->num_tcs.pg_tcs == 8) {
3980                         if (dcb_config->vt_mode)
3981                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3982                                         IXGBE_MRQC_VMDQRT8TCEN;
3983                         else {
3984                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3985                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3986                                         IXGBE_MRQC_RTRSS8TCEN;
3987                         }
3988                 }
3989
3990                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3991
3992                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3993                         /* Disable drop for all queues in VMDQ mode*/
3994                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3995                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3996                                                 (IXGBE_QDE_WRITE |
3997                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3998                 } else {
3999                         /* Enable drop for all queues in SRIOV mode */
4000                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4001                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4002                                                 (IXGBE_QDE_WRITE |
4003                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4004                                                  IXGBE_QDE_ENABLE));
4005                 }
4006         }
4007
4008         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4009         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4010         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4011         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4012
4013         /* VFTA - enable all vlan filters */
4014         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4015                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4016         }
4017
4018         /*
4019          * Configure Rx packet plane (recycle mode; WSP) and
4020          * enable arbiter
4021          */
4022         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4023         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4024 }
4025
4026 static void
4027 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4028                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4029 {
4030         switch (hw->mac.type) {
4031         case ixgbe_mac_82598EB:
4032                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4033                 break;
4034         case ixgbe_mac_82599EB:
4035         case ixgbe_mac_X540:
4036         case ixgbe_mac_X550:
4037         case ixgbe_mac_X550EM_x:
4038         case ixgbe_mac_X550EM_a:
4039                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4040                                                   tsa, map);
4041                 break;
4042         default:
4043                 break;
4044         }
4045 }
4046
4047 static void
4048 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4049                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4050 {
4051         switch (hw->mac.type) {
4052         case ixgbe_mac_82598EB:
4053                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4054                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4055                 break;
4056         case ixgbe_mac_82599EB:
4057         case ixgbe_mac_X540:
4058         case ixgbe_mac_X550:
4059         case ixgbe_mac_X550EM_x:
4060         case ixgbe_mac_X550EM_a:
4061                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4062                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4063                 break;
4064         default:
4065                 break;
4066         }
4067 }
4068
4069 #define DCB_RX_CONFIG  1
4070 #define DCB_TX_CONFIG  1
4071 #define DCB_TX_PB      1024
4072 /**
4073  * ixgbe_dcb_hw_configure - Enable DCB and configure
4074  * general DCB in VT mode and non-VT mode parameters
4075  * @dev: pointer to rte_eth_dev structure
4076  * @dcb_config: pointer to ixgbe_dcb_config structure
4077  */
4078 static int
4079 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4080                         struct ixgbe_dcb_config *dcb_config)
4081 {
4082         int     ret = 0;
4083         uint8_t i, pfc_en, nb_tcs;
4084         uint16_t pbsize, rx_buffer_size;
4085         uint8_t config_dcb_rx = 0;
4086         uint8_t config_dcb_tx = 0;
4087         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4088         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4089         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4090         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4091         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4092         struct ixgbe_dcb_tc_config *tc;
4093         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4094                 RTE_ETHER_CRC_LEN;
4095         struct ixgbe_hw *hw =
4096                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4097         struct ixgbe_bw_conf *bw_conf =
4098                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4099
4100         switch (dev->data->dev_conf.rxmode.mq_mode) {
4101         case ETH_MQ_RX_VMDQ_DCB:
4102                 dcb_config->vt_mode = true;
4103                 if (hw->mac.type != ixgbe_mac_82598EB) {
4104                         config_dcb_rx = DCB_RX_CONFIG;
4105                         /*
4106                          *get dcb and VT rx configuration parameters
4107                          *from rte_eth_conf
4108                          */
4109                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4110                         /*Configure general VMDQ and DCB RX parameters*/
4111                         ixgbe_vmdq_dcb_configure(dev);
4112                 }
4113                 break;
4114         case ETH_MQ_RX_DCB:
4115         case ETH_MQ_RX_DCB_RSS:
4116                 dcb_config->vt_mode = false;
4117                 config_dcb_rx = DCB_RX_CONFIG;
4118                 /* Get dcb TX configuration parameters from rte_eth_conf */
4119                 ixgbe_dcb_rx_config(dev, dcb_config);
4120                 /*Configure general DCB RX parameters*/
4121                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4122                 break;
4123         default:
4124                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4125                 break;
4126         }
4127         switch (dev->data->dev_conf.txmode.mq_mode) {
4128         case ETH_MQ_TX_VMDQ_DCB:
4129                 dcb_config->vt_mode = true;
4130                 config_dcb_tx = DCB_TX_CONFIG;
4131                 /* get DCB and VT TX configuration parameters
4132                  * from rte_eth_conf
4133                  */
4134                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4135                 /*Configure general VMDQ and DCB TX parameters*/
4136                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4137                 break;
4138
4139         case ETH_MQ_TX_DCB:
4140                 dcb_config->vt_mode = false;
4141                 config_dcb_tx = DCB_TX_CONFIG;
4142                 /*get DCB TX configuration parameters from rte_eth_conf*/
4143                 ixgbe_dcb_tx_config(dev, dcb_config);
4144                 /*Configure general DCB TX parameters*/
4145                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4146                 break;
4147         default:
4148                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4149                 break;
4150         }
4151
4152         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4153         /* Unpack map */
4154         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4155         if (nb_tcs == ETH_4_TCS) {
4156                 /* Avoid un-configured priority mapping to TC0 */
4157                 uint8_t j = 4;
4158                 uint8_t mask = 0xFF;
4159
4160                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4161                         mask = (uint8_t)(mask & (~(1 << map[i])));
4162                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4163                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4164                                 map[j++] = i;
4165                         mask >>= 1;
4166                 }
4167                 /* Re-configure 4 TCs BW */
4168                 for (i = 0; i < nb_tcs; i++) {
4169                         tc = &dcb_config->tc_config[i];
4170                         if (bw_conf->tc_num != nb_tcs)
4171                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4172                                         (uint8_t)(100 / nb_tcs);
4173                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4174                                                 (uint8_t)(100 / nb_tcs);
4175                 }
4176                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4177                         tc = &dcb_config->tc_config[i];
4178                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4179                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4180                 }
4181         } else {
4182                 /* Re-configure 8 TCs BW */
4183                 for (i = 0; i < nb_tcs; i++) {
4184                         tc = &dcb_config->tc_config[i];
4185                         if (bw_conf->tc_num != nb_tcs)
4186                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4187                                         (uint8_t)(100 / nb_tcs + (i & 1));
4188                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4189                                 (uint8_t)(100 / nb_tcs + (i & 1));
4190                 }
4191         }
4192
4193         switch (hw->mac.type) {
4194         case ixgbe_mac_X550:
4195         case ixgbe_mac_X550EM_x:
4196         case ixgbe_mac_X550EM_a:
4197                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4198                 break;
4199         default:
4200                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4201                 break;
4202         }
4203
4204         if (config_dcb_rx) {
4205                 /* Set RX buffer size */
4206                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4207                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4208
4209                 for (i = 0; i < nb_tcs; i++) {
4210                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4211                 }
4212                 /* zero alloc all unused TCs */
4213                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4214                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4215                 }
4216         }
4217         if (config_dcb_tx) {
4218                 /* Only support an equally distributed
4219                  *  Tx packet buffer strategy.
4220                  */
4221                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4222                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4223
4224                 for (i = 0; i < nb_tcs; i++) {
4225                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4226                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4227                 }
4228                 /* Clear unused TCs, if any, to zero buffer size*/
4229                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4230                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4231                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4232                 }
4233         }
4234
4235         /*Calculates traffic class credits*/
4236         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4237                                 IXGBE_DCB_TX_CONFIG);
4238         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4239                                 IXGBE_DCB_RX_CONFIG);
4240
4241         if (config_dcb_rx) {
4242                 /* Unpack CEE standard containers */
4243                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4244                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4245                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4246                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4247                 /* Configure PG(ETS) RX */
4248                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4249         }
4250
4251         if (config_dcb_tx) {
4252                 /* Unpack CEE standard containers */
4253                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4254                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4255                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4256                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4257                 /* Configure PG(ETS) TX */
4258                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4259         }
4260
4261         /*Configure queue statistics registers*/
4262         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4263
4264         /* Check if the PFC is supported */
4265         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4266                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4267                 for (i = 0; i < nb_tcs; i++) {
4268                         /*
4269                         * If the TC count is 8,and the default high_water is 48,
4270                         * the low_water is 16 as default.
4271                         */
4272                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4273                         hw->fc.low_water[i] = pbsize / 4;
4274                         /* Enable pfc for this TC */
4275                         tc = &dcb_config->tc_config[i];
4276                         tc->pfc = ixgbe_dcb_pfc_enabled;
4277                 }
4278                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4279                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4280                         pfc_en &= 0x0F;
4281                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4282         }
4283
4284         return ret;
4285 }
4286
4287 /**
4288  * ixgbe_configure_dcb - Configure DCB  Hardware
4289  * @dev: pointer to rte_eth_dev
4290  */
4291 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4292 {
4293         struct ixgbe_dcb_config *dcb_cfg =
4294                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4295         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4296
4297         PMD_INIT_FUNC_TRACE();
4298
4299         /* check support mq_mode for DCB */
4300         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4301             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4302             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4303                 return;
4304
4305         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4306                 return;
4307
4308         /** Configure DCB hardware **/
4309         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4310 }
4311
4312 /*
4313  * VMDq only support for 10 GbE NIC.
4314  */
4315 static void
4316 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4317 {
4318         struct rte_eth_vmdq_rx_conf *cfg;
4319         struct ixgbe_hw *hw;
4320         enum rte_eth_nb_pools num_pools;
4321         uint32_t mrqc, vt_ctl, vlanctrl;
4322         uint32_t vmolr = 0;
4323         int i;
4324
4325         PMD_INIT_FUNC_TRACE();
4326         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4327         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4328         num_pools = cfg->nb_queue_pools;
4329
4330         ixgbe_rss_disable(dev);
4331
4332         /* MRQC: enable vmdq */
4333         mrqc = IXGBE_MRQC_VMDQEN;
4334         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4335
4336         /* PFVTCTL: turn on virtualisation and set the default pool */
4337         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4338         if (cfg->enable_default_pool)
4339                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4340         else
4341                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4342
4343         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4344
4345         for (i = 0; i < (int)num_pools; i++) {
4346                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4347                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4348         }
4349
4350         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4351         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4352         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4353         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4354
4355         /* VFTA - enable all vlan filters */
4356         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4357                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4358
4359         /* VFRE: pool enabling for receive - 64 */
4360         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4361         if (num_pools == ETH_64_POOLS)
4362                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4363
4364         /*
4365          * MPSAR - allow pools to read specific mac addresses
4366          * In this case, all pools should be able to read from mac addr 0
4367          */
4368         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4369         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4370
4371         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4372         for (i = 0; i < cfg->nb_pool_maps; i++) {
4373                 /* set vlan id in VF register and set the valid bit */
4374                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4375                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4376                 /*
4377                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4378                  * pools, we only need to use the first half of the register
4379                  * i.e. bits 0-31
4380                  */
4381                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4382                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4383                                         (cfg->pool_map[i].pools & UINT32_MAX));
4384                 else
4385                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4386                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4387
4388         }
4389
4390         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4391         if (cfg->enable_loop_back) {
4392                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4393                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4394                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4395         }
4396
4397         IXGBE_WRITE_FLUSH(hw);
4398 }
4399
4400 /*
4401  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4402  * @hw: pointer to hardware structure
4403  */
4404 static void
4405 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4406 {
4407         uint32_t reg;
4408         uint32_t q;
4409
4410         PMD_INIT_FUNC_TRACE();
4411         /*PF VF Transmit Enable*/
4412         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4413         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4414
4415         /* Disable the Tx desc arbiter so that MTQC can be changed */
4416         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4417         reg |= IXGBE_RTTDCS_ARBDIS;
4418         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4419
4420         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4421         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4422
4423         /* Disable drop for all queues */
4424         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4425                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4426                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4427
4428         /* Enable the Tx desc arbiter */
4429         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4430         reg &= ~IXGBE_RTTDCS_ARBDIS;
4431         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4432
4433         IXGBE_WRITE_FLUSH(hw);
4434 }
4435
4436 static int __rte_cold
4437 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4438 {
4439         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4440         uint64_t dma_addr;
4441         unsigned int i;
4442
4443         /* Initialize software ring entries */
4444         for (i = 0; i < rxq->nb_rx_desc; i++) {
4445                 volatile union ixgbe_adv_rx_desc *rxd;
4446                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4447
4448                 if (mbuf == NULL) {
4449                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4450                                      (unsigned) rxq->queue_id);
4451                         return -ENOMEM;
4452                 }
4453
4454                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4455                 mbuf->port = rxq->port_id;
4456
4457                 dma_addr =
4458                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4459                 rxd = &rxq->rx_ring[i];
4460                 rxd->read.hdr_addr = 0;
4461                 rxd->read.pkt_addr = dma_addr;
4462                 rxe[i].mbuf = mbuf;
4463         }
4464
4465         return 0;
4466 }
4467
4468 static int
4469 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4470 {
4471         struct ixgbe_hw *hw;
4472         uint32_t mrqc;
4473
4474         ixgbe_rss_configure(dev);
4475
4476         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4477
4478         /* MRQC: enable VF RSS */
4479         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4480         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4481         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4482         case ETH_64_POOLS:
4483                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4484                 break;
4485
4486         case ETH_32_POOLS:
4487                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4488                 break;
4489
4490         default:
4491                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4492                 return -EINVAL;
4493         }
4494
4495         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4496
4497         return 0;
4498 }
4499
4500 static int
4501 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4502 {
4503         struct ixgbe_hw *hw =
4504                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4505
4506         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4507         case ETH_64_POOLS:
4508                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4509                         IXGBE_MRQC_VMDQEN);
4510                 break;
4511
4512         case ETH_32_POOLS:
4513                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4514                         IXGBE_MRQC_VMDQRT4TCEN);
4515                 break;
4516
4517         case ETH_16_POOLS:
4518                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4519                         IXGBE_MRQC_VMDQRT8TCEN);
4520                 break;
4521         default:
4522                 PMD_INIT_LOG(ERR,
4523                         "invalid pool number in IOV mode");
4524                 break;
4525         }
4526         return 0;
4527 }
4528
4529 static int
4530 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4531 {
4532         struct ixgbe_hw *hw =
4533                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4534
4535         if (hw->mac.type == ixgbe_mac_82598EB)
4536                 return 0;
4537
4538         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4539                 /*
4540                  * SRIOV inactive scheme
4541                  * any DCB/RSS w/o VMDq multi-queue setting
4542                  */
4543                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4544                 case ETH_MQ_RX_RSS:
4545                 case ETH_MQ_RX_DCB_RSS:
4546                 case ETH_MQ_RX_VMDQ_RSS:
4547                         ixgbe_rss_configure(dev);
4548                         break;
4549
4550                 case ETH_MQ_RX_VMDQ_DCB:
4551                         ixgbe_vmdq_dcb_configure(dev);
4552                         break;
4553
4554                 case ETH_MQ_RX_VMDQ_ONLY:
4555                         ixgbe_vmdq_rx_hw_configure(dev);
4556                         break;
4557
4558                 case ETH_MQ_RX_NONE:
4559                 default:
4560                         /* if mq_mode is none, disable rss mode.*/
4561                         ixgbe_rss_disable(dev);
4562                         break;
4563                 }
4564         } else {
4565                 /* SRIOV active scheme
4566                  * Support RSS together with SRIOV.
4567                  */
4568                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4569                 case ETH_MQ_RX_RSS:
4570                 case ETH_MQ_RX_VMDQ_RSS:
4571                         ixgbe_config_vf_rss(dev);
4572                         break;
4573                 case ETH_MQ_RX_VMDQ_DCB:
4574                 case ETH_MQ_RX_DCB:
4575                 /* In SRIOV, the configuration is the same as VMDq case */
4576                         ixgbe_vmdq_dcb_configure(dev);
4577                         break;
4578                 /* DCB/RSS together with SRIOV is not supported */
4579                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4580                 case ETH_MQ_RX_DCB_RSS:
4581                         PMD_INIT_LOG(ERR,
4582                                 "Could not support DCB/RSS with VMDq & SRIOV");
4583                         return -1;
4584                 default:
4585                         ixgbe_config_vf_default(dev);
4586                         break;
4587                 }
4588         }
4589
4590         return 0;
4591 }
4592
4593 static int
4594 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4595 {
4596         struct ixgbe_hw *hw =
4597                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4598         uint32_t mtqc;
4599         uint32_t rttdcs;
4600
4601         if (hw->mac.type == ixgbe_mac_82598EB)
4602                 return 0;
4603
4604         /* disable arbiter before setting MTQC */
4605         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4606         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4607         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4608
4609         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4610                 /*
4611                  * SRIOV inactive scheme
4612                  * any DCB w/o VMDq multi-queue setting
4613                  */
4614                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4615                         ixgbe_vmdq_tx_hw_configure(hw);
4616                 else {
4617                         mtqc = IXGBE_MTQC_64Q_1PB;
4618                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4619                 }
4620         } else {
4621                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4622
4623                 /*
4624                  * SRIOV active scheme
4625                  * FIXME if support DCB together with VMDq & SRIOV
4626                  */
4627                 case ETH_64_POOLS:
4628                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4629                         break;
4630                 case ETH_32_POOLS:
4631                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4632                         break;
4633                 case ETH_16_POOLS:
4634                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4635                                 IXGBE_MTQC_8TC_8TQ;
4636                         break;
4637                 default:
4638                         mtqc = IXGBE_MTQC_64Q_1PB;
4639                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4640                 }
4641                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4642         }
4643
4644         /* re-enable arbiter */
4645         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4646         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4647
4648         return 0;
4649 }
4650
4651 /**
4652  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4653  *
4654  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4655  * spec rev. 3.0 chapter 8.2.3.8.13.
4656  *
4657  * @pool Memory pool of the Rx queue
4658  */
4659 static inline uint32_t
4660 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4661 {
4662         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4663
4664         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4665         uint16_t maxdesc =
4666                 RTE_IPV4_MAX_PKT_LEN /
4667                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4668
4669         if (maxdesc >= 16)
4670                 return IXGBE_RSCCTL_MAXDESC_16;
4671         else if (maxdesc >= 8)
4672                 return IXGBE_RSCCTL_MAXDESC_8;
4673         else if (maxdesc >= 4)
4674                 return IXGBE_RSCCTL_MAXDESC_4;
4675         else
4676                 return IXGBE_RSCCTL_MAXDESC_1;
4677 }
4678
4679 /**
4680  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4681  * interrupt
4682  *
4683  * (Taken from FreeBSD tree)
4684  * (yes this is all very magic and confusing :)
4685  *
4686  * @dev port handle
4687  * @entry the register array entry
4688  * @vector the MSIX vector for this queue
4689  * @type RX/TX/MISC
4690  */
4691 static void
4692 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4693 {
4694         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4695         u32 ivar, index;
4696
4697         vector |= IXGBE_IVAR_ALLOC_VAL;
4698
4699         switch (hw->mac.type) {
4700
4701         case ixgbe_mac_82598EB:
4702                 if (type == -1)
4703                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4704                 else
4705                         entry += (type * 64);
4706                 index = (entry >> 2) & 0x1F;
4707                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4708                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4709                 ivar |= (vector << (8 * (entry & 0x3)));
4710                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4711                 break;
4712
4713         case ixgbe_mac_82599EB:
4714         case ixgbe_mac_X540:
4715                 if (type == -1) { /* MISC IVAR */
4716                         index = (entry & 1) * 8;
4717                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4718                         ivar &= ~(0xFF << index);
4719                         ivar |= (vector << index);
4720                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4721                 } else {        /* RX/TX IVARS */
4722                         index = (16 * (entry & 1)) + (8 * type);
4723                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4724                         ivar &= ~(0xFF << index);
4725                         ivar |= (vector << index);
4726                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4727                 }
4728
4729                 break;
4730
4731         default:
4732                 break;
4733         }
4734 }
4735
4736 void __rte_cold
4737 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4738 {
4739         uint16_t i, rx_using_sse;
4740         struct ixgbe_adapter *adapter = dev->data->dev_private;
4741
4742         /*
4743          * In order to allow Vector Rx there are a few configuration
4744          * conditions to be met and Rx Bulk Allocation should be allowed.
4745          */
4746         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4747             !adapter->rx_bulk_alloc_allowed) {
4748                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4749                                     "preconditions",
4750                              dev->data->port_id);
4751
4752                 adapter->rx_vec_allowed = false;
4753         }
4754
4755         /*
4756          * Initialize the appropriate LRO callback.
4757          *
4758          * If all queues satisfy the bulk allocation preconditions
4759          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4760          * Otherwise use a single allocation version.
4761          */
4762         if (dev->data->lro) {
4763                 if (adapter->rx_bulk_alloc_allowed) {
4764                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4765                                            "allocation version");
4766                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4767                 } else {
4768                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4769                                            "allocation version");
4770                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4771                 }
4772         } else if (dev->data->scattered_rx) {
4773                 /*
4774                  * Set the non-LRO scattered callback: there are Vector and
4775                  * single allocation versions.
4776                  */
4777                 if (adapter->rx_vec_allowed) {
4778                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4779                                             "callback (port=%d).",
4780                                      dev->data->port_id);
4781
4782                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4783                 } else if (adapter->rx_bulk_alloc_allowed) {
4784                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4785                                            "allocation callback (port=%d).",
4786                                      dev->data->port_id);
4787                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4788                 } else {
4789                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4790                                             "single allocation) "
4791                                             "Scattered Rx callback "
4792                                             "(port=%d).",
4793                                      dev->data->port_id);
4794
4795                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4796                 }
4797         /*
4798          * Below we set "simple" callbacks according to port/queues parameters.
4799          * If parameters allow we are going to choose between the following
4800          * callbacks:
4801          *    - Vector
4802          *    - Bulk Allocation
4803          *    - Single buffer allocation (the simplest one)
4804          */
4805         } else if (adapter->rx_vec_allowed) {
4806                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4807                                     "burst size no less than %d (port=%d).",
4808                              RTE_IXGBE_DESCS_PER_LOOP,
4809                              dev->data->port_id);
4810
4811                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4812         } else if (adapter->rx_bulk_alloc_allowed) {
4813                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4814                                     "satisfied. Rx Burst Bulk Alloc function "
4815                                     "will be used on port=%d.",
4816                              dev->data->port_id);
4817
4818                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4819         } else {
4820                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4821                                     "satisfied, or Scattered Rx is requested "
4822                                     "(port=%d).",
4823                              dev->data->port_id);
4824
4825                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4826         }
4827
4828         /* Propagate information about RX function choice through all queues. */
4829
4830         rx_using_sse =
4831                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4832                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4833
4834         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4835                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4836
4837                 rxq->rx_using_sse = rx_using_sse;
4838 #ifdef RTE_LIBRTE_SECURITY
4839                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4840                                 DEV_RX_OFFLOAD_SECURITY);
4841 #endif
4842         }
4843 }
4844
4845 /**
4846  * ixgbe_set_rsc - configure RSC related port HW registers
4847  *
4848  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4849  * of 82599 Spec (x540 configuration is virtually the same).
4850  *
4851  * @dev port handle
4852  *
4853  * Returns 0 in case of success or a non-zero error code
4854  */
4855 static int
4856 ixgbe_set_rsc(struct rte_eth_dev *dev)
4857 {
4858         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4859         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4860         struct rte_eth_dev_info dev_info = { 0 };
4861         bool rsc_capable = false;
4862         uint16_t i;
4863         uint32_t rdrxctl;
4864         uint32_t rfctl;
4865
4866         /* Sanity check */
4867         dev->dev_ops->dev_infos_get(dev, &dev_info);
4868         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4869                 rsc_capable = true;
4870
4871         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4872                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4873                                    "support it");
4874                 return -EINVAL;
4875         }
4876
4877         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4878
4879         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4880              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4881                 /*
4882                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4883                  * 3.0 RSC configuration requires HW CRC stripping being
4884                  * enabled. If user requested both HW CRC stripping off
4885                  * and RSC on - return an error.
4886                  */
4887                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4888                                     "is disabled");
4889                 return -EINVAL;
4890         }
4891
4892         /* RFCTL configuration  */
4893         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4894         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4895                 /*
4896                  * Since NFS packets coalescing is not supported - clear
4897                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4898                  * enabled.
4899                  */
4900                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4901                            IXGBE_RFCTL_NFSR_DIS);
4902         else
4903                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4904         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4905
4906         /* If LRO hasn't been requested - we are done here. */
4907         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4908                 return 0;
4909
4910         /* Set RDRXCTL.RSCACKC bit */
4911         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4912         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4913         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4914
4915         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4916         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4917                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4918                 uint32_t srrctl =
4919                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4920                 uint32_t rscctl =
4921                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4922                 uint32_t psrtype =
4923                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4924                 uint32_t eitr =
4925                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4926
4927                 /*
4928                  * ixgbe PMD doesn't support header-split at the moment.
4929                  *
4930                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4931                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4932                  * should be configured even if header split is not
4933                  * enabled. We will configure it 128 bytes following the
4934                  * recommendation in the spec.
4935                  */
4936                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4937                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4938                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4939
4940                 /*
4941                  * TODO: Consider setting the Receive Descriptor Minimum
4942                  * Threshold Size for an RSC case. This is not an obviously
4943                  * beneficiary option but the one worth considering...
4944                  */
4945
4946                 rscctl |= IXGBE_RSCCTL_RSCEN;
4947                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4948                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4949
4950                 /*
4951                  * RSC: Set ITR interval corresponding to 2K ints/s.
4952                  *
4953                  * Full-sized RSC aggregations for a 10Gb/s link will
4954                  * arrive at about 20K aggregation/s rate.
4955                  *
4956                  * 2K inst/s rate will make only 10% of the
4957                  * aggregations to be closed due to the interrupt timer
4958                  * expiration for a streaming at wire-speed case.
4959                  *
4960                  * For a sparse streaming case this setting will yield
4961                  * at most 500us latency for a single RSC aggregation.
4962                  */
4963                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4964                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4965                 eitr |= IXGBE_EITR_CNT_WDIS;
4966
4967                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4968                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4969                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4970                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4971
4972                 /*
4973                  * RSC requires the mapping of the queue to the
4974                  * interrupt vector.
4975                  */
4976                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4977         }
4978
4979         dev->data->lro = 1;
4980
4981         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4982
4983         return 0;
4984 }
4985
4986 /*
4987  * Initializes Receive Unit.
4988  */
4989 int __rte_cold
4990 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4991 {
4992         struct ixgbe_hw     *hw;
4993         struct ixgbe_rx_queue *rxq;
4994         uint64_t bus_addr;
4995         uint32_t rxctrl;
4996         uint32_t fctrl;
4997         uint32_t hlreg0;
4998         uint32_t maxfrs;
4999         uint32_t srrctl;
5000         uint32_t rdrxctl;
5001         uint32_t rxcsum;
5002         uint16_t buf_size;
5003         uint16_t i;
5004         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5005         int rc;
5006
5007         PMD_INIT_FUNC_TRACE();
5008         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5009
5010         /*
5011          * Make sure receives are disabled while setting
5012          * up the RX context (registers, descriptor rings, etc.).
5013          */
5014         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5015         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5016
5017         /* Enable receipt of broadcasted frames */
5018         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5019         fctrl |= IXGBE_FCTRL_BAM;
5020         fctrl |= IXGBE_FCTRL_DPF;
5021         fctrl |= IXGBE_FCTRL_PMCF;
5022         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5023
5024         /*
5025          * Configure CRC stripping, if any.
5026          */
5027         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5028         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5029                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5030         else
5031                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5032
5033         /*
5034          * Configure jumbo frame support, if any.
5035          */
5036         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5037                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5038                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5039                 maxfrs &= 0x0000FFFF;
5040                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5041                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5042         } else
5043                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5044
5045         /*
5046          * If loopback mode is configured, set LPBK bit.
5047          */
5048         if (dev->data->dev_conf.lpbk_mode != 0) {
5049                 rc = ixgbe_check_supported_loopback_mode(dev);
5050                 if (rc < 0) {
5051                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5052                         return rc;
5053                 }
5054                 hlreg0 |= IXGBE_HLREG0_LPBK;
5055         } else {
5056                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5057         }
5058
5059         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5060
5061         /*
5062          * Assume no header split and no VLAN strip support
5063          * on any Rx queue first .
5064          */
5065         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5066         /* Setup RX queues */
5067         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5068                 rxq = dev->data->rx_queues[i];
5069
5070                 /*
5071                  * Reset crc_len in case it was changed after queue setup by a
5072                  * call to configure.
5073                  */
5074                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5075                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5076                 else
5077                         rxq->crc_len = 0;
5078
5079                 /* Setup the Base and Length of the Rx Descriptor Rings */
5080                 bus_addr = rxq->rx_ring_phys_addr;
5081                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5082                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5083                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5084                                 (uint32_t)(bus_addr >> 32));
5085                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5086                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5087                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5088                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5089
5090                 /* Configure the SRRCTL register */
5091                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5092
5093                 /* Set if packets are dropped when no descriptors available */
5094                 if (rxq->drop_en)
5095                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5096
5097                 /*
5098                  * Configure the RX buffer size in the BSIZEPACKET field of
5099                  * the SRRCTL register of the queue.
5100                  * The value is in 1 KB resolution. Valid values can be from
5101                  * 1 KB to 16 KB.
5102                  */
5103                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5104                         RTE_PKTMBUF_HEADROOM);
5105                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5106                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5107
5108                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5109
5110                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5111                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5112
5113                 /* It adds dual VLAN length for supporting dual VLAN */
5114                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5115                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5116                         dev->data->scattered_rx = 1;
5117                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5118                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5119         }
5120
5121         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5122                 dev->data->scattered_rx = 1;
5123
5124         /*
5125          * Device configured with multiple RX queues.
5126          */
5127         ixgbe_dev_mq_rx_configure(dev);
5128
5129         /*
5130          * Setup the Checksum Register.
5131          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5132          * Enable IP/L4 checkum computation by hardware if requested to do so.
5133          */
5134         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5135         rxcsum |= IXGBE_RXCSUM_PCSD;
5136         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5137                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5138         else
5139                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5140
5141         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5142
5143         if (hw->mac.type == ixgbe_mac_82599EB ||
5144             hw->mac.type == ixgbe_mac_X540) {
5145                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5146                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5147                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5148                 else
5149                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5150                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5151                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5152         }
5153
5154         rc = ixgbe_set_rsc(dev);
5155         if (rc)
5156                 return rc;
5157
5158         ixgbe_set_rx_function(dev);
5159
5160         return 0;
5161 }
5162
5163 /*
5164  * Initializes Transmit Unit.
5165  */
5166 void __rte_cold
5167 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5168 {
5169         struct ixgbe_hw     *hw;
5170         struct ixgbe_tx_queue *txq;
5171         uint64_t bus_addr;
5172         uint32_t hlreg0;
5173         uint32_t txctrl;
5174         uint16_t i;
5175
5176         PMD_INIT_FUNC_TRACE();
5177         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5178
5179         /* Enable TX CRC (checksum offload requirement) and hw padding
5180          * (TSO requirement)
5181          */
5182         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5183         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5184         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5185
5186         /* Setup the Base and Length of the Tx Descriptor Rings */
5187         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5188                 txq = dev->data->tx_queues[i];
5189
5190                 bus_addr = txq->tx_ring_phys_addr;
5191                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5192                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5193                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5194                                 (uint32_t)(bus_addr >> 32));
5195                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5196                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5197                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5198                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5199                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5200
5201                 /*
5202                  * Disable Tx Head Writeback RO bit, since this hoses
5203                  * bookkeeping if things aren't delivered in order.
5204                  */
5205                 switch (hw->mac.type) {
5206                 case ixgbe_mac_82598EB:
5207                         txctrl = IXGBE_READ_REG(hw,
5208                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5209                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5210                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5211                                         txctrl);
5212                         break;
5213
5214                 case ixgbe_mac_82599EB:
5215                 case ixgbe_mac_X540:
5216                 case ixgbe_mac_X550:
5217                 case ixgbe_mac_X550EM_x:
5218                 case ixgbe_mac_X550EM_a:
5219                 default:
5220                         txctrl = IXGBE_READ_REG(hw,
5221                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5222                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5223                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5224                                         txctrl);
5225                         break;
5226                 }
5227         }
5228
5229         /* Device configured with multiple TX queues. */
5230         ixgbe_dev_mq_tx_configure(dev);
5231 }
5232
5233 /*
5234  * Check if requested loopback mode is supported
5235  */
5236 int
5237 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5238 {
5239         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5240
5241         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5242                 if (hw->mac.type == ixgbe_mac_82599EB ||
5243                      hw->mac.type == ixgbe_mac_X540 ||
5244                      hw->mac.type == ixgbe_mac_X550 ||
5245                      hw->mac.type == ixgbe_mac_X550EM_x ||
5246                      hw->mac.type == ixgbe_mac_X550EM_a)
5247                         return 0;
5248
5249         return -ENOTSUP;
5250 }
5251
5252 /*
5253  * Set up link for 82599 loopback mode Tx->Rx.
5254  */
5255 static inline void __rte_cold
5256 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5257 {
5258         PMD_INIT_FUNC_TRACE();
5259
5260         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5261                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5262                                 IXGBE_SUCCESS) {
5263                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5264                         /* ignore error */
5265                         return;
5266                 }
5267         }
5268
5269         /* Restart link */
5270         IXGBE_WRITE_REG(hw,
5271                         IXGBE_AUTOC,
5272                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5273         ixgbe_reset_pipeline_82599(hw);
5274
5275         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5276         msec_delay(50);
5277 }
5278
5279
5280 /*
5281  * Start Transmit and Receive Units.
5282  */
5283 int __rte_cold
5284 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5285 {
5286         struct ixgbe_hw     *hw;
5287         struct ixgbe_tx_queue *txq;
5288         struct ixgbe_rx_queue *rxq;
5289         uint32_t txdctl;
5290         uint32_t dmatxctl;
5291         uint32_t rxctrl;
5292         uint16_t i;
5293         int ret = 0;
5294
5295         PMD_INIT_FUNC_TRACE();
5296         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5297
5298         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5299                 txq = dev->data->tx_queues[i];
5300                 /* Setup Transmit Threshold Registers */
5301                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5302                 txdctl |= txq->pthresh & 0x7F;
5303                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5304                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5305                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5306         }
5307
5308         if (hw->mac.type != ixgbe_mac_82598EB) {
5309                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5310                 dmatxctl |= IXGBE_DMATXCTL_TE;
5311                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5312         }
5313
5314         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5315                 txq = dev->data->tx_queues[i];
5316                 if (!txq->tx_deferred_start) {
5317                         ret = ixgbe_dev_tx_queue_start(dev, i);
5318                         if (ret < 0)
5319                                 return ret;
5320                 }
5321         }
5322
5323         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5324                 rxq = dev->data->rx_queues[i];
5325                 if (!rxq->rx_deferred_start) {
5326                         ret = ixgbe_dev_rx_queue_start(dev, i);
5327                         if (ret < 0)
5328                                 return ret;
5329                 }
5330         }
5331
5332         /* Enable Receive engine */
5333         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5334         if (hw->mac.type == ixgbe_mac_82598EB)
5335                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5336         rxctrl |= IXGBE_RXCTRL_RXEN;
5337         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5338
5339         /* If loopback mode is enabled, set up the link accordingly */
5340         if (dev->data->dev_conf.lpbk_mode != 0) {
5341                 if (hw->mac.type == ixgbe_mac_82599EB)
5342                         ixgbe_setup_loopback_link_82599(hw);
5343                 else if (hw->mac.type == ixgbe_mac_X540 ||
5344                      hw->mac.type == ixgbe_mac_X550 ||
5345                      hw->mac.type == ixgbe_mac_X550EM_x ||
5346                      hw->mac.type == ixgbe_mac_X550EM_a)
5347                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5348         }
5349
5350 #ifdef RTE_LIBRTE_SECURITY
5351         if ((dev->data->dev_conf.rxmode.offloads &
5352                         DEV_RX_OFFLOAD_SECURITY) ||
5353                 (dev->data->dev_conf.txmode.offloads &
5354                         DEV_TX_OFFLOAD_SECURITY)) {
5355                 ret = ixgbe_crypto_enable_ipsec(dev);
5356                 if (ret != 0) {
5357                         PMD_DRV_LOG(ERR,
5358                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5359                                     ret);
5360                         return ret;
5361                 }
5362         }
5363 #endif
5364
5365         return 0;
5366 }
5367
5368 /*
5369  * Start Receive Units for specified queue.
5370  */
5371 int __rte_cold
5372 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5373 {
5374         struct ixgbe_hw     *hw;
5375         struct ixgbe_rx_queue *rxq;
5376         uint32_t rxdctl;
5377         int poll_ms;
5378
5379         PMD_INIT_FUNC_TRACE();
5380         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5381
5382         rxq = dev->data->rx_queues[rx_queue_id];
5383
5384         /* Allocate buffers for descriptor rings */
5385         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5386                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5387                              rx_queue_id);
5388                 return -1;
5389         }
5390         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5391         rxdctl |= IXGBE_RXDCTL_ENABLE;
5392         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5393
5394         /* Wait until RX Enable ready */
5395         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5396         do {
5397                 rte_delay_ms(1);
5398                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5399         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5400         if (!poll_ms)
5401                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5402         rte_wmb();
5403         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5404         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5405         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5406
5407         return 0;
5408 }
5409
5410 /*
5411  * Stop Receive Units for specified queue.
5412  */
5413 int __rte_cold
5414 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5415 {
5416         struct ixgbe_hw     *hw;
5417         struct ixgbe_adapter *adapter = dev->data->dev_private;
5418         struct ixgbe_rx_queue *rxq;
5419         uint32_t rxdctl;
5420         int poll_ms;
5421
5422         PMD_INIT_FUNC_TRACE();
5423         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5424
5425         rxq = dev->data->rx_queues[rx_queue_id];
5426
5427         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5428         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5429         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5430
5431         /* Wait until RX Enable bit clear */
5432         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5433         do {
5434                 rte_delay_ms(1);
5435                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5436         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5437         if (!poll_ms)
5438                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5439
5440         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5441
5442         ixgbe_rx_queue_release_mbufs(rxq);
5443         ixgbe_reset_rx_queue(adapter, rxq);
5444         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5445
5446         return 0;
5447 }
5448
5449
5450 /*
5451  * Start Transmit Units for specified queue.
5452  */
5453 int __rte_cold
5454 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5455 {
5456         struct ixgbe_hw     *hw;
5457         struct ixgbe_tx_queue *txq;
5458         uint32_t txdctl;
5459         int poll_ms;
5460
5461         PMD_INIT_FUNC_TRACE();
5462         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5463
5464         txq = dev->data->tx_queues[tx_queue_id];
5465         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5466         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5467         txdctl |= IXGBE_TXDCTL_ENABLE;
5468         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5469
5470         /* Wait until TX Enable ready */
5471         if (hw->mac.type == ixgbe_mac_82599EB) {
5472                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5473                 do {
5474                         rte_delay_ms(1);
5475                         txdctl = IXGBE_READ_REG(hw,
5476                                 IXGBE_TXDCTL(txq->reg_idx));
5477                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5478                 if (!poll_ms)
5479                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5480                                 tx_queue_id);
5481         }
5482         rte_wmb();
5483         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5484         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5485
5486         return 0;
5487 }
5488
5489 /*
5490  * Stop Transmit Units for specified queue.
5491  */
5492 int __rte_cold
5493 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5494 {
5495         struct ixgbe_hw     *hw;
5496         struct ixgbe_tx_queue *txq;
5497         uint32_t txdctl;
5498         uint32_t txtdh, txtdt;
5499         int poll_ms;
5500
5501         PMD_INIT_FUNC_TRACE();
5502         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5503
5504         txq = dev->data->tx_queues[tx_queue_id];
5505
5506         /* Wait until TX queue is empty */
5507         if (hw->mac.type == ixgbe_mac_82599EB) {
5508                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5509                 do {
5510                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5511                         txtdh = IXGBE_READ_REG(hw,
5512                                                IXGBE_TDH(txq->reg_idx));
5513                         txtdt = IXGBE_READ_REG(hw,
5514                                                IXGBE_TDT(txq->reg_idx));
5515                 } while (--poll_ms && (txtdh != txtdt));
5516                 if (!poll_ms)
5517                         PMD_INIT_LOG(ERR,
5518                                 "Tx Queue %d is not empty when stopping.",
5519                                 tx_queue_id);
5520         }
5521
5522         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5523         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5524         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5525
5526         /* Wait until TX Enable bit clear */
5527         if (hw->mac.type == ixgbe_mac_82599EB) {
5528                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5529                 do {
5530                         rte_delay_ms(1);
5531                         txdctl = IXGBE_READ_REG(hw,
5532                                                 IXGBE_TXDCTL(txq->reg_idx));
5533                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5534                 if (!poll_ms)
5535                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5536                                 tx_queue_id);
5537         }
5538
5539         if (txq->ops != NULL) {
5540                 txq->ops->release_mbufs(txq);
5541                 txq->ops->reset(txq);
5542         }
5543         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5544
5545         return 0;
5546 }
5547
5548 void
5549 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5550         struct rte_eth_rxq_info *qinfo)
5551 {
5552         struct ixgbe_rx_queue *rxq;
5553
5554         rxq = dev->data->rx_queues[queue_id];
5555
5556         qinfo->mp = rxq->mb_pool;
5557         qinfo->scattered_rx = dev->data->scattered_rx;
5558         qinfo->nb_desc = rxq->nb_rx_desc;
5559
5560         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5561         qinfo->conf.rx_drop_en = rxq->drop_en;
5562         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5563         qinfo->conf.offloads = rxq->offloads;
5564 }
5565
5566 void
5567 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5568         struct rte_eth_txq_info *qinfo)
5569 {
5570         struct ixgbe_tx_queue *txq;
5571
5572         txq = dev->data->tx_queues[queue_id];
5573
5574         qinfo->nb_desc = txq->nb_tx_desc;
5575
5576         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5577         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5578         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5579
5580         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5581         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5582         qinfo->conf.offloads = txq->offloads;
5583         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5584 }
5585
5586 /*
5587  * [VF] Initializes Receive Unit.
5588  */
5589 int __rte_cold
5590 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5591 {
5592         struct ixgbe_hw     *hw;
5593         struct ixgbe_rx_queue *rxq;
5594         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5595         uint64_t bus_addr;
5596         uint32_t srrctl, psrtype = 0;
5597         uint16_t buf_size;
5598         uint16_t i;
5599         int ret;
5600
5601         PMD_INIT_FUNC_TRACE();
5602         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5603
5604         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5605                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5606                         "it should be power of 2");
5607                 return -1;
5608         }
5609
5610         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5611                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5612                         "it should be equal to or less than %d",
5613                         hw->mac.max_rx_queues);
5614                 return -1;
5615         }
5616
5617         /*
5618          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5619          * disables the VF receipt of packets if the PF MTU is > 1500.
5620          * This is done to deal with 82599 limitations that imposes
5621          * the PF and all VFs to share the same MTU.
5622          * Then, the PF driver enables again the VF receipt of packet when
5623          * the VF driver issues a IXGBE_VF_SET_LPE request.
5624          * In the meantime, the VF device cannot be used, even if the VF driver
5625          * and the Guest VM network stack are ready to accept packets with a
5626          * size up to the PF MTU.
5627          * As a work-around to this PF behaviour, force the call to
5628          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5629          * VF packets received can work in all cases.
5630          */
5631         ixgbevf_rlpml_set_vf(hw,
5632                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5633
5634         /*
5635          * Assume no header split and no VLAN strip support
5636          * on any Rx queue first .
5637          */
5638         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5639         /* Setup RX queues */
5640         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5641                 rxq = dev->data->rx_queues[i];
5642
5643                 /* Allocate buffers for descriptor rings */
5644                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5645                 if (ret)
5646                         return ret;
5647
5648                 /* Setup the Base and Length of the Rx Descriptor Rings */
5649                 bus_addr = rxq->rx_ring_phys_addr;
5650
5651                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5652                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5653                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5654                                 (uint32_t)(bus_addr >> 32));
5655                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5656                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5657                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5658                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5659
5660
5661                 /* Configure the SRRCTL register */
5662                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5663
5664                 /* Set if packets are dropped when no descriptors available */
5665                 if (rxq->drop_en)
5666                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5667
5668                 /*
5669                  * Configure the RX buffer size in the BSIZEPACKET field of
5670                  * the SRRCTL register of the queue.
5671                  * The value is in 1 KB resolution. Valid values can be from
5672                  * 1 KB to 16 KB.
5673                  */
5674                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5675                         RTE_PKTMBUF_HEADROOM);
5676                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5677                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5678
5679                 /*
5680                  * VF modification to write virtual function SRRCTL register
5681                  */
5682                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5683
5684                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5685                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5686
5687                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5688                     /* It adds dual VLAN length for supporting dual VLAN */
5689                     (rxmode->max_rx_pkt_len +
5690                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5691                         if (!dev->data->scattered_rx)
5692                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5693                         dev->data->scattered_rx = 1;
5694                 }
5695
5696                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5697                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5698         }
5699
5700         /* Set RQPL for VF RSS according to max Rx queue */
5701         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5702                 IXGBE_PSRTYPE_RQPL_SHIFT;
5703         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5704
5705         ixgbe_set_rx_function(dev);
5706
5707         return 0;
5708 }
5709
5710 /*
5711  * [VF] Initializes Transmit Unit.
5712  */
5713 void __rte_cold
5714 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5715 {
5716         struct ixgbe_hw     *hw;
5717         struct ixgbe_tx_queue *txq;
5718         uint64_t bus_addr;
5719         uint32_t txctrl;
5720         uint16_t i;
5721
5722         PMD_INIT_FUNC_TRACE();
5723         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5724
5725         /* Setup the Base and Length of the Tx Descriptor Rings */
5726         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5727                 txq = dev->data->tx_queues[i];
5728                 bus_addr = txq->tx_ring_phys_addr;
5729                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5730                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5731                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5732                                 (uint32_t)(bus_addr >> 32));
5733                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5734                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5735                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5736                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5737                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5738
5739                 /*
5740                  * Disable Tx Head Writeback RO bit, since this hoses
5741                  * bookkeeping if things aren't delivered in order.
5742                  */
5743                 txctrl = IXGBE_READ_REG(hw,
5744                                 IXGBE_VFDCA_TXCTRL(i));
5745                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5746                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5747                                 txctrl);
5748         }
5749 }
5750
5751 /*
5752  * [VF] Start Transmit and Receive Units.
5753  */
5754 void __rte_cold
5755 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5756 {
5757         struct ixgbe_hw     *hw;
5758         struct ixgbe_tx_queue *txq;
5759         struct ixgbe_rx_queue *rxq;
5760         uint32_t txdctl;
5761         uint32_t rxdctl;
5762         uint16_t i;
5763         int poll_ms;
5764
5765         PMD_INIT_FUNC_TRACE();
5766         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5767
5768         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5769                 txq = dev->data->tx_queues[i];
5770                 /* Setup Transmit Threshold Registers */
5771                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5772                 txdctl |= txq->pthresh & 0x7F;
5773                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5774                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5775                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5776         }
5777
5778         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5779
5780                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5781                 txdctl |= IXGBE_TXDCTL_ENABLE;
5782                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5783
5784                 poll_ms = 10;
5785                 /* Wait until TX Enable ready */
5786                 do {
5787                         rte_delay_ms(1);
5788                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5789                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5790                 if (!poll_ms)
5791                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5792         }
5793         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5794
5795                 rxq = dev->data->rx_queues[i];
5796
5797                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5798                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5799                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5800
5801                 /* Wait until RX Enable ready */
5802                 poll_ms = 10;
5803                 do {
5804                         rte_delay_ms(1);
5805                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5806                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5807                 if (!poll_ms)
5808                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5809                 rte_wmb();
5810                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5811
5812         }
5813 }
5814
5815 int
5816 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5817                     const struct rte_flow_action_rss *in)
5818 {
5819         if (in->key_len > RTE_DIM(out->key) ||
5820             in->queue_num > RTE_DIM(out->queue))
5821                 return -EINVAL;
5822         out->conf = (struct rte_flow_action_rss){
5823                 .func = in->func,
5824                 .level = in->level,
5825                 .types = in->types,
5826                 .key_len = in->key_len,
5827                 .queue_num = in->queue_num,
5828                 .key = memcpy(out->key, in->key, in->key_len),
5829                 .queue = memcpy(out->queue, in->queue,
5830                                 sizeof(*in->queue) * in->queue_num),
5831         };
5832         return 0;
5833 }
5834
5835 int
5836 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5837                       const struct rte_flow_action_rss *with)
5838 {
5839         return (comp->func == with->func &&
5840                 comp->level == with->level &&
5841                 comp->types == with->types &&
5842                 comp->key_len == with->key_len &&
5843                 comp->queue_num == with->queue_num &&
5844                 !memcmp(comp->key, with->key, with->key_len) &&
5845                 !memcmp(comp->queue, with->queue,
5846                         sizeof(*with->queue) * with->queue_num));
5847 }
5848
5849 int
5850 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5851                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5852 {
5853         struct ixgbe_hw *hw;
5854         uint32_t reta;
5855         uint16_t i;
5856         uint16_t j;
5857         uint16_t sp_reta_size;
5858         uint32_t reta_reg;
5859         struct rte_eth_rss_conf rss_conf = {
5860                 .rss_key = conf->conf.key_len ?
5861                         (void *)(uintptr_t)conf->conf.key : NULL,
5862                 .rss_key_len = conf->conf.key_len,
5863                 .rss_hf = conf->conf.types,
5864         };
5865         struct ixgbe_filter_info *filter_info =
5866                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5867
5868         PMD_INIT_FUNC_TRACE();
5869         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5870
5871         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5872
5873         if (!add) {
5874                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5875                                           &conf->conf)) {
5876                         ixgbe_rss_disable(dev);
5877                         memset(&filter_info->rss_info, 0,
5878                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5879                         return 0;
5880                 }
5881                 return -EINVAL;
5882         }
5883
5884         if (filter_info->rss_info.conf.queue_num)
5885                 return -EINVAL;
5886         /* Fill in redirection table
5887          * The byte-swap is needed because NIC registers are in
5888          * little-endian order.
5889          */
5890         reta = 0;
5891         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5892                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5893
5894                 if (j == conf->conf.queue_num)
5895                         j = 0;
5896                 reta = (reta << 8) | conf->conf.queue[j];
5897                 if ((i & 3) == 3)
5898                         IXGBE_WRITE_REG(hw, reta_reg,
5899                                         rte_bswap32(reta));
5900         }
5901
5902         /* Configure the RSS key and the RSS protocols used to compute
5903          * the RSS hash of input packets.
5904          */
5905         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5906                 ixgbe_rss_disable(dev);
5907                 return 0;
5908         }
5909         if (rss_conf.rss_key == NULL)
5910                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5911         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5912
5913         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5914                 return -EINVAL;
5915
5916         return 0;
5917 }
5918
5919 /* Stubs needed for linkage when CONFIG_RTE_ARCH_PPC_64 is set */
5920 #if defined(RTE_ARCH_PPC_64)
5921 int
5922 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5923 {
5924         return -1;
5925 }
5926
5927 uint16_t
5928 ixgbe_recv_pkts_vec(
5929         void __rte_unused *rx_queue,
5930         struct rte_mbuf __rte_unused **rx_pkts,
5931         uint16_t __rte_unused nb_pkts)
5932 {
5933         return 0;
5934 }
5935
5936 uint16_t
5937 ixgbe_recv_scattered_pkts_vec(
5938         void __rte_unused *rx_queue,
5939         struct rte_mbuf __rte_unused **rx_pkts,
5940         uint16_t __rte_unused nb_pkts)
5941 {
5942         return 0;
5943 }
5944
5945 int
5946 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5947 {
5948         return -1;
5949 }
5950
5951 uint16_t
5952 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5953                 struct rte_mbuf __rte_unused **tx_pkts,
5954                 uint16_t __rte_unused nb_pkts)
5955 {
5956         return 0;
5957 }
5958
5959 int
5960 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5961 {
5962         return -1;
5963 }
5964
5965 void
5966 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
5967 {
5968         return;
5969 }
5970 #endif