net/bnxt: fix unnecessary delay in port stop
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45
46 #include "ixgbe_logs.h"
47 #include "base/ixgbe_api.h"
48 #include "base/ixgbe_vf.h"
49 #include "ixgbe_ethdev.h"
50 #include "base/ixgbe_dcb.h"
51 #include "base/ixgbe_common.h"
52 #include "ixgbe_rxtx.h"
53
54 #ifdef RTE_LIBRTE_IEEE1588
55 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
56 #else
57 #define IXGBE_TX_IEEE1588_TMST 0
58 #endif
59 /* Bit Mask to indicate what bits required for building TX context */
60 #define IXGBE_TX_OFFLOAD_MASK (                  \
61                 PKT_TX_OUTER_IPV6 |              \
62                 PKT_TX_OUTER_IPV4 |              \
63                 PKT_TX_IPV6 |                    \
64                 PKT_TX_IPV4 |                    \
65                 PKT_TX_VLAN_PKT |                \
66                 PKT_TX_IP_CKSUM |                \
67                 PKT_TX_L4_MASK |                 \
68                 PKT_TX_TCP_SEG |                 \
69                 PKT_TX_MACSEC |                  \
70                 PKT_TX_OUTER_IP_CKSUM |          \
71                 PKT_TX_SEC_OFFLOAD |     \
72                 IXGBE_TX_IEEE1588_TMST)
73
74 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
75                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
76
77 #if 1
78 #define RTE_PMD_USE_PREFETCH
79 #endif
80
81 #ifdef RTE_PMD_USE_PREFETCH
82 /*
83  * Prefetch a cache line into all cache levels.
84  */
85 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
86 #else
87 #define rte_ixgbe_prefetch(p)   do {} while (0)
88 #endif
89
90 /*********************************************************************
91  *
92  *  TX functions
93  *
94  **********************************************************************/
95
96 /*
97  * Check for descriptors with their DD bit set and free mbufs.
98  * Return the total number of buffers freed.
99  */
100 static __rte_always_inline int
101 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
102 {
103         struct ixgbe_tx_entry *txep;
104         uint32_t status;
105         int i, nb_free = 0;
106         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
107
108         /* check DD bit on threshold descriptor */
109         status = txq->tx_ring[txq->tx_next_dd].wb.status;
110         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
111                 return 0;
112
113         /*
114          * first buffer to free from S/W ring is at index
115          * tx_next_dd - (tx_rs_thresh-1)
116          */
117         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
118
119         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
120                 /* free buffers one at a time */
121                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
122                 txep->mbuf = NULL;
123
124                 if (unlikely(m == NULL))
125                         continue;
126
127                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
128                     (nb_free > 0 && m->pool != free[0]->pool)) {
129                         rte_mempool_put_bulk(free[0]->pool,
130                                              (void **)free, nb_free);
131                         nb_free = 0;
132                 }
133
134                 free[nb_free++] = m;
135         }
136
137         if (nb_free > 0)
138                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
139
140         /* buffers were freed, update counters */
141         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
142         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
143         if (txq->tx_next_dd >= txq->nb_tx_desc)
144                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
145
146         return txq->tx_rs_thresh;
147 }
148
149 /* Populate 4 descriptors with data from 4 mbufs */
150 static inline void
151 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
152 {
153         uint64_t buf_dma_addr;
154         uint32_t pkt_len;
155         int i;
156
157         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
158                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
159                 pkt_len = (*pkts)->data_len;
160
161                 /* write data to descriptor */
162                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
163
164                 txdp->read.cmd_type_len =
165                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
166
167                 txdp->read.olinfo_status =
168                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
169
170                 rte_prefetch0(&(*pkts)->pool);
171         }
172 }
173
174 /* Populate 1 descriptor with data from 1 mbuf */
175 static inline void
176 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
177 {
178         uint64_t buf_dma_addr;
179         uint32_t pkt_len;
180
181         buf_dma_addr = rte_mbuf_data_iova(*pkts);
182         pkt_len = (*pkts)->data_len;
183
184         /* write data to descriptor */
185         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
186         txdp->read.cmd_type_len =
187                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
188         txdp->read.olinfo_status =
189                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
190         rte_prefetch0(&(*pkts)->pool);
191 }
192
193 /*
194  * Fill H/W descriptor ring with mbuf data.
195  * Copy mbuf pointers to the S/W ring.
196  */
197 static inline void
198 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
199                       uint16_t nb_pkts)
200 {
201         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
202         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
203         const int N_PER_LOOP = 4;
204         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
205         int mainpart, leftover;
206         int i, j;
207
208         /*
209          * Process most of the packets in chunks of N pkts.  Any
210          * leftover packets will get processed one at a time.
211          */
212         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
213         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
214         for (i = 0; i < mainpart; i += N_PER_LOOP) {
215                 /* Copy N mbuf pointers to the S/W ring */
216                 for (j = 0; j < N_PER_LOOP; ++j) {
217                         (txep + i + j)->mbuf = *(pkts + i + j);
218                 }
219                 tx4(txdp + i, pkts + i);
220         }
221
222         if (unlikely(leftover > 0)) {
223                 for (i = 0; i < leftover; ++i) {
224                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
225                         tx1(txdp + mainpart + i, pkts + mainpart + i);
226                 }
227         }
228 }
229
230 static inline uint16_t
231 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
232              uint16_t nb_pkts)
233 {
234         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
235         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
236         uint16_t n = 0;
237
238         /*
239          * Begin scanning the H/W ring for done descriptors when the
240          * number of available descriptors drops below tx_free_thresh.  For
241          * each done descriptor, free the associated buffer.
242          */
243         if (txq->nb_tx_free < txq->tx_free_thresh)
244                 ixgbe_tx_free_bufs(txq);
245
246         /* Only use descriptors that are available */
247         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
248         if (unlikely(nb_pkts == 0))
249                 return 0;
250
251         /* Use exactly nb_pkts descriptors */
252         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
253
254         /*
255          * At this point, we know there are enough descriptors in the
256          * ring to transmit all the packets.  This assumes that each
257          * mbuf contains a single segment, and that no new offloads
258          * are expected, which would require a new context descriptor.
259          */
260
261         /*
262          * See if we're going to wrap-around. If so, handle the top
263          * of the descriptor ring first, then do the bottom.  If not,
264          * the processing looks just like the "bottom" part anyway...
265          */
266         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
267                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
268                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
269
270                 /*
271                  * We know that the last descriptor in the ring will need to
272                  * have its RS bit set because tx_rs_thresh has to be
273                  * a divisor of the ring size
274                  */
275                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
276                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
277                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
278
279                 txq->tx_tail = 0;
280         }
281
282         /* Fill H/W descriptor ring with mbuf data */
283         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
284         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
285
286         /*
287          * Determine if RS bit should be set
288          * This is what we actually want:
289          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
290          * but instead of subtracting 1 and doing >=, we can just do
291          * greater than without subtracting.
292          */
293         if (txq->tx_tail > txq->tx_next_rs) {
294                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
295                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
296                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
297                                                 txq->tx_rs_thresh);
298                 if (txq->tx_next_rs >= txq->nb_tx_desc)
299                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
300         }
301
302         /*
303          * Check for wrap-around. This would only happen if we used
304          * up to the last descriptor in the ring, no more, no less.
305          */
306         if (txq->tx_tail >= txq->nb_tx_desc)
307                 txq->tx_tail = 0;
308
309         /* update tail pointer */
310         rte_wmb();
311         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
312
313         return nb_pkts;
314 }
315
316 uint16_t
317 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
318                        uint16_t nb_pkts)
319 {
320         uint16_t nb_tx;
321
322         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
323         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
324                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
325
326         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
327         nb_tx = 0;
328         while (nb_pkts) {
329                 uint16_t ret, n;
330
331                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
332                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
333                 nb_tx = (uint16_t)(nb_tx + ret);
334                 nb_pkts = (uint16_t)(nb_pkts - ret);
335                 if (ret < n)
336                         break;
337         }
338
339         return nb_tx;
340 }
341
342 static uint16_t
343 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
344                     uint16_t nb_pkts)
345 {
346         uint16_t nb_tx = 0;
347         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
348
349         while (nb_pkts) {
350                 uint16_t ret, num;
351
352                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
353                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
354                                                  num);
355                 nb_tx += ret;
356                 nb_pkts -= ret;
357                 if (ret < num)
358                         break;
359         }
360
361         return nb_tx;
362 }
363
364 static inline void
365 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
366                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
367                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
368                 __rte_unused uint64_t *mdata)
369 {
370         uint32_t type_tucmd_mlhl;
371         uint32_t mss_l4len_idx = 0;
372         uint32_t ctx_idx;
373         uint32_t vlan_macip_lens;
374         union ixgbe_tx_offload tx_offload_mask;
375         uint32_t seqnum_seed = 0;
376
377         ctx_idx = txq->ctx_curr;
378         tx_offload_mask.data[0] = 0;
379         tx_offload_mask.data[1] = 0;
380         type_tucmd_mlhl = 0;
381
382         /* Specify which HW CTX to upload. */
383         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
384
385         if (ol_flags & PKT_TX_VLAN_PKT) {
386                 tx_offload_mask.vlan_tci |= ~0;
387         }
388
389         /* check if TCP segmentation required for this packet */
390         if (ol_flags & PKT_TX_TCP_SEG) {
391                 /* implies IP cksum in IPv4 */
392                 if (ol_flags & PKT_TX_IP_CKSUM)
393                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
394                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
395                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
396                 else
397                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
398                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
399                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
400
401                 tx_offload_mask.l2_len |= ~0;
402                 tx_offload_mask.l3_len |= ~0;
403                 tx_offload_mask.l4_len |= ~0;
404                 tx_offload_mask.tso_segsz |= ~0;
405                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
406                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
407         } else { /* no TSO, check if hardware checksum is needed */
408                 if (ol_flags & PKT_TX_IP_CKSUM) {
409                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
410                         tx_offload_mask.l2_len |= ~0;
411                         tx_offload_mask.l3_len |= ~0;
412                 }
413
414                 switch (ol_flags & PKT_TX_L4_MASK) {
415                 case PKT_TX_UDP_CKSUM:
416                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
417                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
418                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
419                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
427                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
428                         tx_offload_mask.l2_len |= ~0;
429                         tx_offload_mask.l3_len |= ~0;
430                         break;
431                 case PKT_TX_SCTP_CKSUM:
432                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
433                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
434                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
435                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
436                         tx_offload_mask.l2_len |= ~0;
437                         tx_offload_mask.l3_len |= ~0;
438                         break;
439                 default:
440                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
441                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
442                         break;
443                 }
444         }
445
446         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
447                 tx_offload_mask.outer_l2_len |= ~0;
448                 tx_offload_mask.outer_l3_len |= ~0;
449                 tx_offload_mask.l2_len |= ~0;
450                 seqnum_seed |= tx_offload.outer_l3_len
451                                << IXGBE_ADVTXD_OUTER_IPLEN;
452                 seqnum_seed |= tx_offload.l2_len
453                                << IXGBE_ADVTXD_TUNNEL_LEN;
454         }
455 #ifdef RTE_LIBRTE_SECURITY
456         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
457                 union ixgbe_crypto_tx_desc_md *md =
458                                 (union ixgbe_crypto_tx_desc_md *)mdata;
459                 seqnum_seed |=
460                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
461                 type_tucmd_mlhl |= md->enc ?
462                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
463                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
464                 type_tucmd_mlhl |=
465                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
466                 tx_offload_mask.sa_idx |= ~0;
467                 tx_offload_mask.sec_pad_len |= ~0;
468         }
469 #endif
470
471         txq->ctx_cache[ctx_idx].flags = ol_flags;
472         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
473                 tx_offload_mask.data[0] & tx_offload.data[0];
474         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
475                 tx_offload_mask.data[1] & tx_offload.data[1];
476         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
477
478         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
479         vlan_macip_lens = tx_offload.l3_len;
480         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
481                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
482                                     IXGBE_ADVTXD_MACLEN_SHIFT);
483         else
484                 vlan_macip_lens |= (tx_offload.l2_len <<
485                                     IXGBE_ADVTXD_MACLEN_SHIFT);
486         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
487         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
488         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
489         ctx_txd->seqnum_seed     = seqnum_seed;
490 }
491
492 /*
493  * Check which hardware context can be used. Use the existing match
494  * or create a new context descriptor.
495  */
496 static inline uint32_t
497 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
498                    union ixgbe_tx_offload tx_offload)
499 {
500         /* If match with the current used context */
501         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
502                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
503                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
504                      & tx_offload.data[0])) &&
505                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
506                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
507                      & tx_offload.data[1]))))
508                 return txq->ctx_curr;
509
510         /* What if match with the next context  */
511         txq->ctx_curr ^= 1;
512         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
513                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
514                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
515                      & tx_offload.data[0])) &&
516                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
517                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
518                      & tx_offload.data[1]))))
519                 return txq->ctx_curr;
520
521         /* Mismatch, use the previous context */
522         return IXGBE_CTX_NUM;
523 }
524
525 static inline uint32_t
526 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
527 {
528         uint32_t tmp = 0;
529
530         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
531                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
532         if (ol_flags & PKT_TX_IP_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
534         if (ol_flags & PKT_TX_TCP_SEG)
535                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
536         return tmp;
537 }
538
539 static inline uint32_t
540 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
541 {
542         uint32_t cmdtype = 0;
543
544         if (ol_flags & PKT_TX_VLAN_PKT)
545                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
548         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
549                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
550         if (ol_flags & PKT_TX_MACSEC)
551                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
552         return cmdtype;
553 }
554
555 /* Default RS bit threshold values */
556 #ifndef DEFAULT_TX_RS_THRESH
557 #define DEFAULT_TX_RS_THRESH   32
558 #endif
559 #ifndef DEFAULT_TX_FREE_THRESH
560 #define DEFAULT_TX_FREE_THRESH 32
561 #endif
562
563 /* Reset transmit descriptors after they have been used */
564 static inline int
565 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
566 {
567         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
568         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
569         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
570         uint16_t nb_tx_desc = txq->nb_tx_desc;
571         uint16_t desc_to_clean_to;
572         uint16_t nb_tx_to_clean;
573         uint32_t status;
574
575         /* Determine the last descriptor needing to be cleaned */
576         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
577         if (desc_to_clean_to >= nb_tx_desc)
578                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
579
580         /* Check to make sure the last descriptor to clean is done */
581         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
582         status = txr[desc_to_clean_to].wb.status;
583         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
584                 PMD_TX_FREE_LOG(DEBUG,
585                                 "TX descriptor %4u is not done"
586                                 "(port=%d queue=%d)",
587                                 desc_to_clean_to,
588                                 txq->port_id, txq->queue_id);
589                 /* Failed to clean any descriptors, better luck next time */
590                 return -(1);
591         }
592
593         /* Figure out how many descriptors will be cleaned */
594         if (last_desc_cleaned > desc_to_clean_to)
595                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
596                                                         desc_to_clean_to);
597         else
598                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
599                                                 last_desc_cleaned);
600
601         PMD_TX_FREE_LOG(DEBUG,
602                         "Cleaning %4u TX descriptors: %4u to %4u "
603                         "(port=%d queue=%d)",
604                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
605                         txq->port_id, txq->queue_id);
606
607         /*
608          * The last descriptor to clean is done, so that means all the
609          * descriptors from the last descriptor that was cleaned
610          * up to the last descriptor with the RS bit set
611          * are done. Only reset the threshold descriptor.
612          */
613         txr[desc_to_clean_to].wb.status = 0;
614
615         /* Update the txq to reflect the last descriptor that was cleaned */
616         txq->last_desc_cleaned = desc_to_clean_to;
617         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
618
619         /* No Error */
620         return 0;
621 }
622
623 uint16_t
624 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
625                 uint16_t nb_pkts)
626 {
627         struct ixgbe_tx_queue *txq;
628         struct ixgbe_tx_entry *sw_ring;
629         struct ixgbe_tx_entry *txe, *txn;
630         volatile union ixgbe_adv_tx_desc *txr;
631         volatile union ixgbe_adv_tx_desc *txd, *txp;
632         struct rte_mbuf     *tx_pkt;
633         struct rte_mbuf     *m_seg;
634         uint64_t buf_dma_addr;
635         uint32_t olinfo_status;
636         uint32_t cmd_type_len;
637         uint32_t pkt_len;
638         uint16_t slen;
639         uint64_t ol_flags;
640         uint16_t tx_id;
641         uint16_t tx_last;
642         uint16_t nb_tx;
643         uint16_t nb_used;
644         uint64_t tx_ol_req;
645         uint32_t ctx = 0;
646         uint32_t new_ctx;
647         union ixgbe_tx_offload tx_offload;
648 #ifdef RTE_LIBRTE_SECURITY
649         uint8_t use_ipsec;
650 #endif
651
652         tx_offload.data[0] = 0;
653         tx_offload.data[1] = 0;
654         txq = tx_queue;
655         sw_ring = txq->sw_ring;
656         txr     = txq->tx_ring;
657         tx_id   = txq->tx_tail;
658         txe = &sw_ring[tx_id];
659         txp = NULL;
660
661         /* Determine if the descriptor ring needs to be cleaned. */
662         if (txq->nb_tx_free < txq->tx_free_thresh)
663                 ixgbe_xmit_cleanup(txq);
664
665         rte_prefetch0(&txe->mbuf->pool);
666
667         /* TX loop */
668         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
669                 new_ctx = 0;
670                 tx_pkt = *tx_pkts++;
671                 pkt_len = tx_pkt->pkt_len;
672
673                 /*
674                  * Determine how many (if any) context descriptors
675                  * are needed for offload functionality.
676                  */
677                 ol_flags = tx_pkt->ol_flags;
678 #ifdef RTE_LIBRTE_SECURITY
679                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
680 #endif
681
682                 /* If hardware offload required */
683                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
684                 if (tx_ol_req) {
685                         tx_offload.l2_len = tx_pkt->l2_len;
686                         tx_offload.l3_len = tx_pkt->l3_len;
687                         tx_offload.l4_len = tx_pkt->l4_len;
688                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
689                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
690                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
691                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
692 #ifdef RTE_LIBRTE_SECURITY
693                         if (use_ipsec) {
694                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
695                                         (union ixgbe_crypto_tx_desc_md *)
696                                                         &tx_pkt->udata64;
697                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
698                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
699                         }
700 #endif
701
702                         /* If new context need be built or reuse the exist ctx. */
703                         ctx = what_advctx_update(txq, tx_ol_req,
704                                 tx_offload);
705                         /* Only allocate context descriptor if required*/
706                         new_ctx = (ctx == IXGBE_CTX_NUM);
707                         ctx = txq->ctx_curr;
708                 }
709
710                 /*
711                  * Keep track of how many descriptors are used this loop
712                  * This will always be the number of segments + the number of
713                  * Context descriptors required to transmit the packet
714                  */
715                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
716
717                 if (txp != NULL &&
718                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
719                         /* set RS on the previous packet in the burst */
720                         txp->read.cmd_type_len |=
721                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
722
723                 /*
724                  * The number of descriptors that must be allocated for a
725                  * packet is the number of segments of that packet, plus 1
726                  * Context Descriptor for the hardware offload, if any.
727                  * Determine the last TX descriptor to allocate in the TX ring
728                  * for the packet, starting from the current position (tx_id)
729                  * in the ring.
730                  */
731                 tx_last = (uint16_t) (tx_id + nb_used - 1);
732
733                 /* Circular ring */
734                 if (tx_last >= txq->nb_tx_desc)
735                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
736
737                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
738                            " tx_first=%u tx_last=%u",
739                            (unsigned) txq->port_id,
740                            (unsigned) txq->queue_id,
741                            (unsigned) pkt_len,
742                            (unsigned) tx_id,
743                            (unsigned) tx_last);
744
745                 /*
746                  * Make sure there are enough TX descriptors available to
747                  * transmit the entire packet.
748                  * nb_used better be less than or equal to txq->tx_rs_thresh
749                  */
750                 if (nb_used > txq->nb_tx_free) {
751                         PMD_TX_FREE_LOG(DEBUG,
752                                         "Not enough free TX descriptors "
753                                         "nb_used=%4u nb_free=%4u "
754                                         "(port=%d queue=%d)",
755                                         nb_used, txq->nb_tx_free,
756                                         txq->port_id, txq->queue_id);
757
758                         if (ixgbe_xmit_cleanup(txq) != 0) {
759                                 /* Could not clean any descriptors */
760                                 if (nb_tx == 0)
761                                         return 0;
762                                 goto end_of_tx;
763                         }
764
765                         /* nb_used better be <= txq->tx_rs_thresh */
766                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
767                                 PMD_TX_FREE_LOG(DEBUG,
768                                         "The number of descriptors needed to "
769                                         "transmit the packet exceeds the "
770                                         "RS bit threshold. This will impact "
771                                         "performance."
772                                         "nb_used=%4u nb_free=%4u "
773                                         "tx_rs_thresh=%4u. "
774                                         "(port=%d queue=%d)",
775                                         nb_used, txq->nb_tx_free,
776                                         txq->tx_rs_thresh,
777                                         txq->port_id, txq->queue_id);
778                                 /*
779                                  * Loop here until there are enough TX
780                                  * descriptors or until the ring cannot be
781                                  * cleaned.
782                                  */
783                                 while (nb_used > txq->nb_tx_free) {
784                                         if (ixgbe_xmit_cleanup(txq) != 0) {
785                                                 /*
786                                                  * Could not clean any
787                                                  * descriptors
788                                                  */
789                                                 if (nb_tx == 0)
790                                                         return 0;
791                                                 goto end_of_tx;
792                                         }
793                                 }
794                         }
795                 }
796
797                 /*
798                  * By now there are enough free TX descriptors to transmit
799                  * the packet.
800                  */
801
802                 /*
803                  * Set common flags of all TX Data Descriptors.
804                  *
805                  * The following bits must be set in all Data Descriptors:
806                  *   - IXGBE_ADVTXD_DTYP_DATA
807                  *   - IXGBE_ADVTXD_DCMD_DEXT
808                  *
809                  * The following bits must be set in the first Data Descriptor
810                  * and are ignored in the other ones:
811                  *   - IXGBE_ADVTXD_DCMD_IFCS
812                  *   - IXGBE_ADVTXD_MAC_1588
813                  *   - IXGBE_ADVTXD_DCMD_VLE
814                  *
815                  * The following bits must only be set in the last Data
816                  * Descriptor:
817                  *   - IXGBE_TXD_CMD_EOP
818                  *
819                  * The following bits can be set in any Data Descriptor, but
820                  * are only set in the last Data Descriptor:
821                  *   - IXGBE_TXD_CMD_RS
822                  */
823                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
824                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
825
826 #ifdef RTE_LIBRTE_IEEE1588
827                 if (ol_flags & PKT_TX_IEEE1588_TMST)
828                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
829 #endif
830
831                 olinfo_status = 0;
832                 if (tx_ol_req) {
833
834                         if (ol_flags & PKT_TX_TCP_SEG) {
835                                 /* when TSO is on, paylen in descriptor is the
836                                  * not the packet len but the tcp payload len */
837                                 pkt_len -= (tx_offload.l2_len +
838                                         tx_offload.l3_len + tx_offload.l4_len);
839                         }
840
841                         /*
842                          * Setup the TX Advanced Context Descriptor if required
843                          */
844                         if (new_ctx) {
845                                 volatile struct ixgbe_adv_tx_context_desc *
846                                     ctx_txd;
847
848                                 ctx_txd = (volatile struct
849                                     ixgbe_adv_tx_context_desc *)
850                                     &txr[tx_id];
851
852                                 txn = &sw_ring[txe->next_id];
853                                 rte_prefetch0(&txn->mbuf->pool);
854
855                                 if (txe->mbuf != NULL) {
856                                         rte_pktmbuf_free_seg(txe->mbuf);
857                                         txe->mbuf = NULL;
858                                 }
859
860                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
861                                         tx_offload, &tx_pkt->udata64);
862
863                                 txe->last_id = tx_last;
864                                 tx_id = txe->next_id;
865                                 txe = txn;
866                         }
867
868                         /*
869                          * Setup the TX Advanced Data Descriptor,
870                          * This path will go through
871                          * whatever new/reuse the context descriptor
872                          */
873                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
874                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
875                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
876                 }
877
878                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
879 #ifdef RTE_LIBRTE_SECURITY
880                 if (use_ipsec)
881                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
882 #endif
883
884                 m_seg = tx_pkt;
885                 do {
886                         txd = &txr[tx_id];
887                         txn = &sw_ring[txe->next_id];
888                         rte_prefetch0(&txn->mbuf->pool);
889
890                         if (txe->mbuf != NULL)
891                                 rte_pktmbuf_free_seg(txe->mbuf);
892                         txe->mbuf = m_seg;
893
894                         /*
895                          * Set up Transmit Data Descriptor.
896                          */
897                         slen = m_seg->data_len;
898                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
899                         txd->read.buffer_addr =
900                                 rte_cpu_to_le_64(buf_dma_addr);
901                         txd->read.cmd_type_len =
902                                 rte_cpu_to_le_32(cmd_type_len | slen);
903                         txd->read.olinfo_status =
904                                 rte_cpu_to_le_32(olinfo_status);
905                         txe->last_id = tx_last;
906                         tx_id = txe->next_id;
907                         txe = txn;
908                         m_seg = m_seg->next;
909                 } while (m_seg != NULL);
910
911                 /*
912                  * The last packet data descriptor needs End Of Packet (EOP)
913                  */
914                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
915                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
916                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
917
918                 /* Set RS bit only on threshold packets' last descriptor */
919                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
920                         PMD_TX_FREE_LOG(DEBUG,
921                                         "Setting RS bit on TXD id="
922                                         "%4u (port=%d queue=%d)",
923                                         tx_last, txq->port_id, txq->queue_id);
924
925                         cmd_type_len |= IXGBE_TXD_CMD_RS;
926
927                         /* Update txq RS bit counters */
928                         txq->nb_tx_used = 0;
929                         txp = NULL;
930                 } else
931                         txp = txd;
932
933                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
934         }
935
936 end_of_tx:
937         /* set RS on last packet in the burst */
938         if (txp != NULL)
939                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
940
941         rte_wmb();
942
943         /*
944          * Set the Transmit Descriptor Tail (TDT)
945          */
946         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
947                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
948                    (unsigned) tx_id, (unsigned) nb_tx);
949         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
950         txq->tx_tail = tx_id;
951
952         return nb_tx;
953 }
954
955 /*********************************************************************
956  *
957  *  TX prep functions
958  *
959  **********************************************************************/
960 uint16_t
961 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
962 {
963         int i, ret;
964         uint64_t ol_flags;
965         struct rte_mbuf *m;
966         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
967
968         for (i = 0; i < nb_pkts; i++) {
969                 m = tx_pkts[i];
970                 ol_flags = m->ol_flags;
971
972                 /**
973                  * Check if packet meets requirements for number of segments
974                  *
975                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
976                  *       non-TSO
977                  */
978
979                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
980                         rte_errno = EINVAL;
981                         return i;
982                 }
983
984                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
985                         rte_errno = ENOTSUP;
986                         return i;
987                 }
988
989 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
990                 ret = rte_validate_tx_offload(m);
991                 if (ret != 0) {
992                         rte_errno = -ret;
993                         return i;
994                 }
995 #endif
996                 ret = rte_net_intel_cksum_prepare(m);
997                 if (ret != 0) {
998                         rte_errno = -ret;
999                         return i;
1000                 }
1001         }
1002
1003         return i;
1004 }
1005
1006 /*********************************************************************
1007  *
1008  *  RX functions
1009  *
1010  **********************************************************************/
1011
1012 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1013 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1014 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1015 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1016 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1017 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1018 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1019 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1020 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1021 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1022 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1023 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1024 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1025 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1026 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1027 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1028 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1029 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1030 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1031 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1032 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1033 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1034 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1035 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1045
1046 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1047 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1048 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1049 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1050 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1051 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1052 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1069
1070 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1071 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1072 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1073 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1074 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1075 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1076 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1093
1094 /**
1095  * Use 2 different table for normal packet and tunnel packet
1096  * to save the space.
1097  */
1098 const uint32_t
1099         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1100         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1101         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1102                 RTE_PTYPE_L3_IPV4,
1103         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1104                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1105         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1106                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1107         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1108                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1109         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1110                 RTE_PTYPE_L3_IPV4_EXT,
1111         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1112                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1113         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1114                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1115         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1116                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1117         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1118                 RTE_PTYPE_L3_IPV6,
1119         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1120                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1121         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1122                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1123         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1124                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1125         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1126                 RTE_PTYPE_L3_IPV6_EXT,
1127         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1128                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1129         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1131         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1132                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1133         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1134                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1135                 RTE_PTYPE_INNER_L3_IPV6,
1136         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1138                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1139         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1140                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1141         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1145         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1147                 RTE_PTYPE_INNER_L3_IPV6,
1148         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1150                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1151         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1157         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1160         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1163         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1169         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1172         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1175         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1179                 RTE_PTYPE_L2_ETHER |
1180                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1181                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1182 };
1183
1184 const uint32_t
1185         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1186         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1187                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1188                 RTE_PTYPE_INNER_L2_ETHER,
1189         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1190                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1191                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1192         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1193                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1194                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1195         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1210                 RTE_PTYPE_INNER_L4_TCP,
1211         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1212                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1214                 RTE_PTYPE_INNER_L4_TCP,
1215         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1216                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1217                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1218         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1219                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1220                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1221                 RTE_PTYPE_INNER_L4_TCP,
1222         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1223                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1224                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1225                 RTE_PTYPE_INNER_L3_IPV4,
1226         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1227                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1228                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1229                 RTE_PTYPE_INNER_L4_UDP,
1230         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1231                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1232                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1233                 RTE_PTYPE_INNER_L4_UDP,
1234         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1235                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1236                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1237                 RTE_PTYPE_INNER_L4_SCTP,
1238         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1239                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1240                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1241         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1242                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1243                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1244                 RTE_PTYPE_INNER_L4_UDP,
1245         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1246                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1247                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1248                 RTE_PTYPE_INNER_L4_SCTP,
1249         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1250                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1251                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1252                 RTE_PTYPE_INNER_L3_IPV4,
1253         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1254                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1255                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1256                 RTE_PTYPE_INNER_L4_SCTP,
1257         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1258                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1259                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1260                 RTE_PTYPE_INNER_L4_SCTP,
1261         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1262                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1263                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1264                 RTE_PTYPE_INNER_L4_TCP,
1265         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1266                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1267                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1268                 RTE_PTYPE_INNER_L4_UDP,
1269
1270         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1272                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1273         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1274                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1275                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1276                 RTE_PTYPE_INNER_L3_IPV4,
1277         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1278                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1280                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1281         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1282                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                 RTE_PTYPE_INNER_L3_IPV6,
1285         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1286                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1287                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1288                 RTE_PTYPE_INNER_L3_IPV4,
1289         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1290                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1293         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1294                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                 RTE_PTYPE_INNER_L3_IPV4,
1297         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1298                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1301         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1302                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1305         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1306                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                 RTE_PTYPE_INNER_L3_IPV4,
1309         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1310                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1313         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1314                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1315                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1316                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1317         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1318                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1321         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1322                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1323                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1324                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1325         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1326                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1329         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1330                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                 RTE_PTYPE_INNER_L3_IPV4,
1333         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1334                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1335                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1336                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1337         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1338                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1339                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1340                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1341         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1342                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1343                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1344                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1345         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1346                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1347                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1348                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1349         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1350                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1351                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1352                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1353         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1354                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1355                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1356                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1357         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1358                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1359                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1360                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1361 };
1362
1363 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1364 static inline uint32_t
1365 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1366 {
1367
1368         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1369                 return RTE_PTYPE_UNKNOWN;
1370
1371         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1372
1373         /* For tunnel packet */
1374         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1375                 /* Remove the tunnel bit to save the space. */
1376                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1377                 return ptype_table_tn[pkt_info];
1378         }
1379
1380         /**
1381          * For x550, if it's not tunnel,
1382          * tunnel type bit should be set to 0.
1383          * Reuse 82599's mask.
1384          */
1385         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1386
1387         return ptype_table[pkt_info];
1388 }
1389
1390 static inline uint64_t
1391 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1392 {
1393         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1394                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1395                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1396                 PKT_RX_RSS_HASH, 0, 0, 0,
1397                 0, 0, 0,  PKT_RX_FDIR,
1398         };
1399 #ifdef RTE_LIBRTE_IEEE1588
1400         static uint64_t ip_pkt_etqf_map[8] = {
1401                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1402                 0, 0, 0, 0,
1403         };
1404
1405         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1406                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1407                                 ip_rss_types_map[pkt_info & 0XF];
1408         else
1409                 return ip_rss_types_map[pkt_info & 0XF];
1410 #else
1411         return ip_rss_types_map[pkt_info & 0XF];
1412 #endif
1413 }
1414
1415 static inline uint64_t
1416 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1417 {
1418         uint64_t pkt_flags;
1419
1420         /*
1421          * Check if VLAN present only.
1422          * Do not check whether L3/L4 rx checksum done by NIC or not,
1423          * That can be found from rte_eth_rxmode.offloads flag
1424          */
1425         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1426
1427 #ifdef RTE_LIBRTE_IEEE1588
1428         if (rx_status & IXGBE_RXD_STAT_TMST)
1429                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1430 #endif
1431         return pkt_flags;
1432 }
1433
1434 static inline uint64_t
1435 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1436 {
1437         uint64_t pkt_flags;
1438
1439         /*
1440          * Bit 31: IPE, IPv4 checksum error
1441          * Bit 30: L4I, L4I integrity error
1442          */
1443         static uint64_t error_to_pkt_flags_map[4] = {
1444                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1445                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1446                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1447                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1448         };
1449         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1450                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1451
1452         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1453             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1454                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1455         }
1456
1457 #ifdef RTE_LIBRTE_SECURITY
1458         if (rx_status & IXGBE_RXD_STAT_SECP) {
1459                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1460                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1461                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1462         }
1463 #endif
1464
1465         return pkt_flags;
1466 }
1467
1468 /*
1469  * LOOK_AHEAD defines how many desc statuses to check beyond the
1470  * current descriptor.
1471  * It must be a pound define for optimal performance.
1472  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1473  * function only works with LOOK_AHEAD=8.
1474  */
1475 #define LOOK_AHEAD 8
1476 #if (LOOK_AHEAD != 8)
1477 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1478 #endif
1479 static inline int
1480 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1481 {
1482         volatile union ixgbe_adv_rx_desc *rxdp;
1483         struct ixgbe_rx_entry *rxep;
1484         struct rte_mbuf *mb;
1485         uint16_t pkt_len;
1486         uint64_t pkt_flags;
1487         int nb_dd;
1488         uint32_t s[LOOK_AHEAD];
1489         uint32_t pkt_info[LOOK_AHEAD];
1490         int i, j, nb_rx = 0;
1491         uint32_t status;
1492         uint64_t vlan_flags = rxq->vlan_flags;
1493
1494         /* get references to current descriptor and S/W ring entry */
1495         rxdp = &rxq->rx_ring[rxq->rx_tail];
1496         rxep = &rxq->sw_ring[rxq->rx_tail];
1497
1498         status = rxdp->wb.upper.status_error;
1499         /* check to make sure there is at least 1 packet to receive */
1500         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1501                 return 0;
1502
1503         /*
1504          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1505          * reference packets that are ready to be received.
1506          */
1507         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1508              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1509                 /* Read desc statuses backwards to avoid race condition */
1510                 for (j = 0; j < LOOK_AHEAD; j++)
1511                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1512
1513                 rte_smp_rmb();
1514
1515                 /* Compute how many status bits were set */
1516                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1517                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1518                         ;
1519
1520                 for (j = 0; j < nb_dd; j++)
1521                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1522                                                        lo_dword.data);
1523
1524                 nb_rx += nb_dd;
1525
1526                 /* Translate descriptor info to mbuf format */
1527                 for (j = 0; j < nb_dd; ++j) {
1528                         mb = rxep[j].mbuf;
1529                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1530                                   rxq->crc_len;
1531                         mb->data_len = pkt_len;
1532                         mb->pkt_len = pkt_len;
1533                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1534
1535                         /* convert descriptor fields to rte mbuf flags */
1536                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1537                                 vlan_flags);
1538                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1539                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1540                                         ((uint16_t)pkt_info[j]);
1541                         mb->ol_flags = pkt_flags;
1542                         mb->packet_type =
1543                                 ixgbe_rxd_pkt_info_to_pkt_type
1544                                         (pkt_info[j], rxq->pkt_type_mask);
1545
1546                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1547                                 mb->hash.rss = rte_le_to_cpu_32(
1548                                     rxdp[j].wb.lower.hi_dword.rss);
1549                         else if (pkt_flags & PKT_RX_FDIR) {
1550                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1551                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1552                                     IXGBE_ATR_HASH_MASK;
1553                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1554                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1555                         }
1556                 }
1557
1558                 /* Move mbuf pointers from the S/W ring to the stage */
1559                 for (j = 0; j < LOOK_AHEAD; ++j) {
1560                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1561                 }
1562
1563                 /* stop if all requested packets could not be received */
1564                 if (nb_dd != LOOK_AHEAD)
1565                         break;
1566         }
1567
1568         /* clear software ring entries so we can cleanup correctly */
1569         for (i = 0; i < nb_rx; ++i) {
1570                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1571         }
1572
1573
1574         return nb_rx;
1575 }
1576
1577 static inline int
1578 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1579 {
1580         volatile union ixgbe_adv_rx_desc *rxdp;
1581         struct ixgbe_rx_entry *rxep;
1582         struct rte_mbuf *mb;
1583         uint16_t alloc_idx;
1584         __le64 dma_addr;
1585         int diag, i;
1586
1587         /* allocate buffers in bulk directly into the S/W ring */
1588         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1589         rxep = &rxq->sw_ring[alloc_idx];
1590         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1591                                     rxq->rx_free_thresh);
1592         if (unlikely(diag != 0))
1593                 return -ENOMEM;
1594
1595         rxdp = &rxq->rx_ring[alloc_idx];
1596         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1597                 /* populate the static rte mbuf fields */
1598                 mb = rxep[i].mbuf;
1599                 if (reset_mbuf) {
1600                         mb->port = rxq->port_id;
1601                 }
1602
1603                 rte_mbuf_refcnt_set(mb, 1);
1604                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1605
1606                 /* populate the descriptors */
1607                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1608                 rxdp[i].read.hdr_addr = 0;
1609                 rxdp[i].read.pkt_addr = dma_addr;
1610         }
1611
1612         /* update state of internal queue structure */
1613         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1614         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1615                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1616
1617         /* no errors */
1618         return 0;
1619 }
1620
1621 static inline uint16_t
1622 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1623                          uint16_t nb_pkts)
1624 {
1625         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1626         int i;
1627
1628         /* how many packets are ready to return? */
1629         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1630
1631         /* copy mbuf pointers to the application's packet list */
1632         for (i = 0; i < nb_pkts; ++i)
1633                 rx_pkts[i] = stage[i];
1634
1635         /* update internal queue state */
1636         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1637         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1638
1639         return nb_pkts;
1640 }
1641
1642 static inline uint16_t
1643 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1644              uint16_t nb_pkts)
1645 {
1646         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1647         uint16_t nb_rx = 0;
1648
1649         /* Any previously recv'd pkts will be returned from the Rx stage */
1650         if (rxq->rx_nb_avail)
1651                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1652
1653         /* Scan the H/W ring for packets to receive */
1654         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1655
1656         /* update internal queue state */
1657         rxq->rx_next_avail = 0;
1658         rxq->rx_nb_avail = nb_rx;
1659         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1660
1661         /* if required, allocate new buffers to replenish descriptors */
1662         if (rxq->rx_tail > rxq->rx_free_trigger) {
1663                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1664
1665                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1666                         int i, j;
1667
1668                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1669                                    "queue_id=%u", (unsigned) rxq->port_id,
1670                                    (unsigned) rxq->queue_id);
1671
1672                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1673                                 rxq->rx_free_thresh;
1674
1675                         /*
1676                          * Need to rewind any previous receives if we cannot
1677                          * allocate new buffers to replenish the old ones.
1678                          */
1679                         rxq->rx_nb_avail = 0;
1680                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1681                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1682                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1683
1684                         return 0;
1685                 }
1686
1687                 /* update tail pointer */
1688                 rte_wmb();
1689                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1690                                             cur_free_trigger);
1691         }
1692
1693         if (rxq->rx_tail >= rxq->nb_rx_desc)
1694                 rxq->rx_tail = 0;
1695
1696         /* received any packets this loop? */
1697         if (rxq->rx_nb_avail)
1698                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1699
1700         return 0;
1701 }
1702
1703 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1704 uint16_t
1705 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1706                            uint16_t nb_pkts)
1707 {
1708         uint16_t nb_rx;
1709
1710         if (unlikely(nb_pkts == 0))
1711                 return 0;
1712
1713         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1714                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1715
1716         /* request is relatively large, chunk it up */
1717         nb_rx = 0;
1718         while (nb_pkts) {
1719                 uint16_t ret, n;
1720
1721                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1722                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1723                 nb_rx = (uint16_t)(nb_rx + ret);
1724                 nb_pkts = (uint16_t)(nb_pkts - ret);
1725                 if (ret < n)
1726                         break;
1727         }
1728
1729         return nb_rx;
1730 }
1731
1732 uint16_t
1733 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1734                 uint16_t nb_pkts)
1735 {
1736         struct ixgbe_rx_queue *rxq;
1737         volatile union ixgbe_adv_rx_desc *rx_ring;
1738         volatile union ixgbe_adv_rx_desc *rxdp;
1739         struct ixgbe_rx_entry *sw_ring;
1740         struct ixgbe_rx_entry *rxe;
1741         struct rte_mbuf *rxm;
1742         struct rte_mbuf *nmb;
1743         union ixgbe_adv_rx_desc rxd;
1744         uint64_t dma_addr;
1745         uint32_t staterr;
1746         uint32_t pkt_info;
1747         uint16_t pkt_len;
1748         uint16_t rx_id;
1749         uint16_t nb_rx;
1750         uint16_t nb_hold;
1751         uint64_t pkt_flags;
1752         uint64_t vlan_flags;
1753
1754         nb_rx = 0;
1755         nb_hold = 0;
1756         rxq = rx_queue;
1757         rx_id = rxq->rx_tail;
1758         rx_ring = rxq->rx_ring;
1759         sw_ring = rxq->sw_ring;
1760         vlan_flags = rxq->vlan_flags;
1761         while (nb_rx < nb_pkts) {
1762                 /*
1763                  * The order of operations here is important as the DD status
1764                  * bit must not be read after any other descriptor fields.
1765                  * rx_ring and rxdp are pointing to volatile data so the order
1766                  * of accesses cannot be reordered by the compiler. If they were
1767                  * not volatile, they could be reordered which could lead to
1768                  * using invalid descriptor fields when read from rxd.
1769                  */
1770                 rxdp = &rx_ring[rx_id];
1771                 staterr = rxdp->wb.upper.status_error;
1772                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1773                         break;
1774                 rxd = *rxdp;
1775
1776                 /*
1777                  * End of packet.
1778                  *
1779                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1780                  * is likely to be invalid and to be dropped by the various
1781                  * validation checks performed by the network stack.
1782                  *
1783                  * Allocate a new mbuf to replenish the RX ring descriptor.
1784                  * If the allocation fails:
1785                  *    - arrange for that RX descriptor to be the first one
1786                  *      being parsed the next time the receive function is
1787                  *      invoked [on the same queue].
1788                  *
1789                  *    - Stop parsing the RX ring and return immediately.
1790                  *
1791                  * This policy do not drop the packet received in the RX
1792                  * descriptor for which the allocation of a new mbuf failed.
1793                  * Thus, it allows that packet to be later retrieved if
1794                  * mbuf have been freed in the mean time.
1795                  * As a side effect, holding RX descriptors instead of
1796                  * systematically giving them back to the NIC may lead to
1797                  * RX ring exhaustion situations.
1798                  * However, the NIC can gracefully prevent such situations
1799                  * to happen by sending specific "back-pressure" flow control
1800                  * frames to its peer(s).
1801                  */
1802                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1803                            "ext_err_stat=0x%08x pkt_len=%u",
1804                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1805                            (unsigned) rx_id, (unsigned) staterr,
1806                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1807
1808                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1809                 if (nmb == NULL) {
1810                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1811                                    "queue_id=%u", (unsigned) rxq->port_id,
1812                                    (unsigned) rxq->queue_id);
1813                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1814                         break;
1815                 }
1816
1817                 nb_hold++;
1818                 rxe = &sw_ring[rx_id];
1819                 rx_id++;
1820                 if (rx_id == rxq->nb_rx_desc)
1821                         rx_id = 0;
1822
1823                 /* Prefetch next mbuf while processing current one. */
1824                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1825
1826                 /*
1827                  * When next RX descriptor is on a cache-line boundary,
1828                  * prefetch the next 4 RX descriptors and the next 8 pointers
1829                  * to mbufs.
1830                  */
1831                 if ((rx_id & 0x3) == 0) {
1832                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1833                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1834                 }
1835
1836                 rxm = rxe->mbuf;
1837                 rxe->mbuf = nmb;
1838                 dma_addr =
1839                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1840                 rxdp->read.hdr_addr = 0;
1841                 rxdp->read.pkt_addr = dma_addr;
1842
1843                 /*
1844                  * Initialize the returned mbuf.
1845                  * 1) setup generic mbuf fields:
1846                  *    - number of segments,
1847                  *    - next segment,
1848                  *    - packet length,
1849                  *    - RX port identifier.
1850                  * 2) integrate hardware offload data, if any:
1851                  *    - RSS flag & hash,
1852                  *    - IP checksum flag,
1853                  *    - VLAN TCI, if any,
1854                  *    - error flags.
1855                  */
1856                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1857                                       rxq->crc_len);
1858                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1859                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1860                 rxm->nb_segs = 1;
1861                 rxm->next = NULL;
1862                 rxm->pkt_len = pkt_len;
1863                 rxm->data_len = pkt_len;
1864                 rxm->port = rxq->port_id;
1865
1866                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1867                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1868                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1869
1870                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1871                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1872                 pkt_flags = pkt_flags |
1873                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1874                 rxm->ol_flags = pkt_flags;
1875                 rxm->packet_type =
1876                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1877                                                        rxq->pkt_type_mask);
1878
1879                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1880                         rxm->hash.rss = rte_le_to_cpu_32(
1881                                                 rxd.wb.lower.hi_dword.rss);
1882                 else if (pkt_flags & PKT_RX_FDIR) {
1883                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1884                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1885                                         IXGBE_ATR_HASH_MASK;
1886                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1887                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1888                 }
1889                 /*
1890                  * Store the mbuf address into the next entry of the array
1891                  * of returned packets.
1892                  */
1893                 rx_pkts[nb_rx++] = rxm;
1894         }
1895         rxq->rx_tail = rx_id;
1896
1897         /*
1898          * If the number of free RX descriptors is greater than the RX free
1899          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1900          * register.
1901          * Update the RDT with the value of the last processed RX descriptor
1902          * minus 1, to guarantee that the RDT register is never equal to the
1903          * RDH register, which creates a "full" ring situtation from the
1904          * hardware point of view...
1905          */
1906         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1907         if (nb_hold > rxq->rx_free_thresh) {
1908                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1909                            "nb_hold=%u nb_rx=%u",
1910                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1911                            (unsigned) rx_id, (unsigned) nb_hold,
1912                            (unsigned) nb_rx);
1913                 rx_id = (uint16_t) ((rx_id == 0) ?
1914                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1915                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1916                 nb_hold = 0;
1917         }
1918         rxq->nb_rx_hold = nb_hold;
1919         return nb_rx;
1920 }
1921
1922 /**
1923  * Detect an RSC descriptor.
1924  */
1925 static inline uint32_t
1926 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1927 {
1928         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1929                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1930 }
1931
1932 /**
1933  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1934  *
1935  * Fill the following info in the HEAD buffer of the Rx cluster:
1936  *    - RX port identifier
1937  *    - hardware offload data, if any:
1938  *      - RSS flag & hash
1939  *      - IP checksum flag
1940  *      - VLAN TCI, if any
1941  *      - error flags
1942  * @head HEAD of the packet cluster
1943  * @desc HW descriptor to get data from
1944  * @rxq Pointer to the Rx queue
1945  */
1946 static inline void
1947 ixgbe_fill_cluster_head_buf(
1948         struct rte_mbuf *head,
1949         union ixgbe_adv_rx_desc *desc,
1950         struct ixgbe_rx_queue *rxq,
1951         uint32_t staterr)
1952 {
1953         uint32_t pkt_info;
1954         uint64_t pkt_flags;
1955
1956         head->port = rxq->port_id;
1957
1958         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1959          * set in the pkt_flags field.
1960          */
1961         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1962         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1963         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1964         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1965         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1966         head->ol_flags = pkt_flags;
1967         head->packet_type =
1968                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1969
1970         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1971                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1972         else if (pkt_flags & PKT_RX_FDIR) {
1973                 head->hash.fdir.hash =
1974                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1975                                                           & IXGBE_ATR_HASH_MASK;
1976                 head->hash.fdir.id =
1977                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1978         }
1979 }
1980
1981 /**
1982  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1983  *
1984  * @rx_queue Rx queue handle
1985  * @rx_pkts table of received packets
1986  * @nb_pkts size of rx_pkts table
1987  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1988  *
1989  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1990  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1991  *
1992  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1993  * 1) When non-EOP RSC completion arrives:
1994  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1995  *       segment's data length.
1996  *    b) Set the "next" pointer of the current segment to point to the segment
1997  *       at the NEXTP index.
1998  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1999  *       in the sw_rsc_ring.
2000  * 2) When EOP arrives we just update the cluster's total length and offload
2001  *    flags and deliver the cluster up to the upper layers. In our case - put it
2002  *    in the rx_pkts table.
2003  *
2004  * Returns the number of received packets/clusters (according to the "bulk
2005  * receive" interface).
2006  */
2007 static inline uint16_t
2008 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2009                     bool bulk_alloc)
2010 {
2011         struct ixgbe_rx_queue *rxq = rx_queue;
2012         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2013         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2014         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2015         uint16_t rx_id = rxq->rx_tail;
2016         uint16_t nb_rx = 0;
2017         uint16_t nb_hold = rxq->nb_rx_hold;
2018         uint16_t prev_id = rxq->rx_tail;
2019
2020         while (nb_rx < nb_pkts) {
2021                 bool eop;
2022                 struct ixgbe_rx_entry *rxe;
2023                 struct ixgbe_scattered_rx_entry *sc_entry;
2024                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2025                 struct ixgbe_rx_entry *next_rxe = NULL;
2026                 struct rte_mbuf *first_seg;
2027                 struct rte_mbuf *rxm;
2028                 struct rte_mbuf *nmb = NULL;
2029                 union ixgbe_adv_rx_desc rxd;
2030                 uint16_t data_len;
2031                 uint16_t next_id;
2032                 volatile union ixgbe_adv_rx_desc *rxdp;
2033                 uint32_t staterr;
2034
2035 next_desc:
2036                 /*
2037                  * The code in this whole file uses the volatile pointer to
2038                  * ensure the read ordering of the status and the rest of the
2039                  * descriptor fields (on the compiler level only!!!). This is so
2040                  * UGLY - why not to just use the compiler barrier instead? DPDK
2041                  * even has the rte_compiler_barrier() for that.
2042                  *
2043                  * But most importantly this is just wrong because this doesn't
2044                  * ensure memory ordering in a general case at all. For
2045                  * instance, DPDK is supposed to work on Power CPUs where
2046                  * compiler barrier may just not be enough!
2047                  *
2048                  * I tried to write only this function properly to have a
2049                  * starting point (as a part of an LRO/RSC series) but the
2050                  * compiler cursed at me when I tried to cast away the
2051                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2052                  * keeping it the way it is for now.
2053                  *
2054                  * The code in this file is broken in so many other places and
2055                  * will just not work on a big endian CPU anyway therefore the
2056                  * lines below will have to be revisited together with the rest
2057                  * of the ixgbe PMD.
2058                  *
2059                  * TODO:
2060                  *    - Get rid of "volatile" and let the compiler do its job.
2061                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2062                  *      memory ordering below.
2063                  */
2064                 rxdp = &rx_ring[rx_id];
2065                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2066
2067                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2068                         break;
2069
2070                 rxd = *rxdp;
2071
2072                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2073                                   "staterr=0x%x data_len=%u",
2074                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2075                            rte_le_to_cpu_16(rxd.wb.upper.length));
2076
2077                 if (!bulk_alloc) {
2078                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2079                         if (nmb == NULL) {
2080                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2081                                                   "port_id=%u queue_id=%u",
2082                                            rxq->port_id, rxq->queue_id);
2083
2084                                 rte_eth_devices[rxq->port_id].data->
2085                                                         rx_mbuf_alloc_failed++;
2086                                 break;
2087                         }
2088                 } else if (nb_hold > rxq->rx_free_thresh) {
2089                         uint16_t next_rdt = rxq->rx_free_trigger;
2090
2091                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2092                                 rte_wmb();
2093                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2094                                                             next_rdt);
2095                                 nb_hold -= rxq->rx_free_thresh;
2096                         } else {
2097                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2098                                                   "port_id=%u queue_id=%u",
2099                                            rxq->port_id, rxq->queue_id);
2100
2101                                 rte_eth_devices[rxq->port_id].data->
2102                                                         rx_mbuf_alloc_failed++;
2103                                 break;
2104                         }
2105                 }
2106
2107                 nb_hold++;
2108                 rxe = &sw_ring[rx_id];
2109                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2110
2111                 next_id = rx_id + 1;
2112                 if (next_id == rxq->nb_rx_desc)
2113                         next_id = 0;
2114
2115                 /* Prefetch next mbuf while processing current one. */
2116                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2117
2118                 /*
2119                  * When next RX descriptor is on a cache-line boundary,
2120                  * prefetch the next 4 RX descriptors and the next 4 pointers
2121                  * to mbufs.
2122                  */
2123                 if ((next_id & 0x3) == 0) {
2124                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2125                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2126                 }
2127
2128                 rxm = rxe->mbuf;
2129
2130                 if (!bulk_alloc) {
2131                         __le64 dma =
2132                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2133                         /*
2134                          * Update RX descriptor with the physical address of the
2135                          * new data buffer of the new allocated mbuf.
2136                          */
2137                         rxe->mbuf = nmb;
2138
2139                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2140                         rxdp->read.hdr_addr = 0;
2141                         rxdp->read.pkt_addr = dma;
2142                 } else
2143                         rxe->mbuf = NULL;
2144
2145                 /*
2146                  * Set data length & data buffer address of mbuf.
2147                  */
2148                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2149                 rxm->data_len = data_len;
2150
2151                 if (!eop) {
2152                         uint16_t nextp_id;
2153                         /*
2154                          * Get next descriptor index:
2155                          *  - For RSC it's in the NEXTP field.
2156                          *  - For a scattered packet - it's just a following
2157                          *    descriptor.
2158                          */
2159                         if (ixgbe_rsc_count(&rxd))
2160                                 nextp_id =
2161                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2162                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2163                         else
2164                                 nextp_id = next_id;
2165
2166                         next_sc_entry = &sw_sc_ring[nextp_id];
2167                         next_rxe = &sw_ring[nextp_id];
2168                         rte_ixgbe_prefetch(next_rxe);
2169                 }
2170
2171                 sc_entry = &sw_sc_ring[rx_id];
2172                 first_seg = sc_entry->fbuf;
2173                 sc_entry->fbuf = NULL;
2174
2175                 /*
2176                  * If this is the first buffer of the received packet,
2177                  * set the pointer to the first mbuf of the packet and
2178                  * initialize its context.
2179                  * Otherwise, update the total length and the number of segments
2180                  * of the current scattered packet, and update the pointer to
2181                  * the last mbuf of the current packet.
2182                  */
2183                 if (first_seg == NULL) {
2184                         first_seg = rxm;
2185                         first_seg->pkt_len = data_len;
2186                         first_seg->nb_segs = 1;
2187                 } else {
2188                         first_seg->pkt_len += data_len;
2189                         first_seg->nb_segs++;
2190                 }
2191
2192                 prev_id = rx_id;
2193                 rx_id = next_id;
2194
2195                 /*
2196                  * If this is not the last buffer of the received packet, update
2197                  * the pointer to the first mbuf at the NEXTP entry in the
2198                  * sw_sc_ring and continue to parse the RX ring.
2199                  */
2200                 if (!eop && next_rxe) {
2201                         rxm->next = next_rxe->mbuf;
2202                         next_sc_entry->fbuf = first_seg;
2203                         goto next_desc;
2204                 }
2205
2206                 /* Initialize the first mbuf of the returned packet */
2207                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2208
2209                 /*
2210                  * Deal with the case, when HW CRC srip is disabled.
2211                  * That can't happen when LRO is enabled, but still could
2212                  * happen for scattered RX mode.
2213                  */
2214                 first_seg->pkt_len -= rxq->crc_len;
2215                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2216                         struct rte_mbuf *lp;
2217
2218                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2219                                 ;
2220
2221                         first_seg->nb_segs--;
2222                         lp->data_len -= rxq->crc_len - rxm->data_len;
2223                         lp->next = NULL;
2224                         rte_pktmbuf_free_seg(rxm);
2225                 } else
2226                         rxm->data_len -= rxq->crc_len;
2227
2228                 /* Prefetch data of first segment, if configured to do so. */
2229                 rte_packet_prefetch((char *)first_seg->buf_addr +
2230                         first_seg->data_off);
2231
2232                 /*
2233                  * Store the mbuf address into the next entry of the array
2234                  * of returned packets.
2235                  */
2236                 rx_pkts[nb_rx++] = first_seg;
2237         }
2238
2239         /*
2240          * Record index of the next RX descriptor to probe.
2241          */
2242         rxq->rx_tail = rx_id;
2243
2244         /*
2245          * If the number of free RX descriptors is greater than the RX free
2246          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2247          * register.
2248          * Update the RDT with the value of the last processed RX descriptor
2249          * minus 1, to guarantee that the RDT register is never equal to the
2250          * RDH register, which creates a "full" ring situtation from the
2251          * hardware point of view...
2252          */
2253         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2254                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2255                            "nb_hold=%u nb_rx=%u",
2256                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2257
2258                 rte_wmb();
2259                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2260                 nb_hold = 0;
2261         }
2262
2263         rxq->nb_rx_hold = nb_hold;
2264         return nb_rx;
2265 }
2266
2267 uint16_t
2268 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2269                                  uint16_t nb_pkts)
2270 {
2271         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2272 }
2273
2274 uint16_t
2275 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2276                                uint16_t nb_pkts)
2277 {
2278         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2279 }
2280
2281 /*********************************************************************
2282  *
2283  *  Queue management functions
2284  *
2285  **********************************************************************/
2286
2287 static void __attribute__((cold))
2288 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2289 {
2290         unsigned i;
2291
2292         if (txq->sw_ring != NULL) {
2293                 for (i = 0; i < txq->nb_tx_desc; i++) {
2294                         if (txq->sw_ring[i].mbuf != NULL) {
2295                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2296                                 txq->sw_ring[i].mbuf = NULL;
2297                         }
2298                 }
2299         }
2300 }
2301
2302 static int
2303 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2304 {
2305         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2306         uint16_t i, tx_last, tx_id;
2307         uint16_t nb_tx_free_last;
2308         uint16_t nb_tx_to_clean;
2309         uint32_t pkt_cnt;
2310
2311         /* Start free mbuf from the next of tx_tail */
2312         tx_last = txq->tx_tail;
2313         tx_id  = swr_ring[tx_last].next_id;
2314
2315         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2316                 return 0;
2317
2318         nb_tx_to_clean = txq->nb_tx_free;
2319         nb_tx_free_last = txq->nb_tx_free;
2320         if (!free_cnt)
2321                 free_cnt = txq->nb_tx_desc;
2322
2323         /* Loop through swr_ring to count the amount of
2324          * freeable mubfs and packets.
2325          */
2326         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2327                 for (i = 0; i < nb_tx_to_clean &&
2328                         pkt_cnt < free_cnt &&
2329                         tx_id != tx_last; i++) {
2330                         if (swr_ring[tx_id].mbuf != NULL) {
2331                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2332                                 swr_ring[tx_id].mbuf = NULL;
2333
2334                                 /*
2335                                  * last segment in the packet,
2336                                  * increment packet count
2337                                  */
2338                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2339                         }
2340
2341                         tx_id = swr_ring[tx_id].next_id;
2342                 }
2343
2344                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2345                         txq->nb_tx_free || tx_id == tx_last)
2346                         break;
2347
2348                 if (pkt_cnt < free_cnt) {
2349                         if (ixgbe_xmit_cleanup(txq))
2350                                 break;
2351
2352                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2353                         nb_tx_free_last = txq->nb_tx_free;
2354                 }
2355         }
2356
2357         return (int)pkt_cnt;
2358 }
2359
2360 static int
2361 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2362                         uint32_t free_cnt)
2363 {
2364         int i, n, cnt;
2365
2366         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2367                 free_cnt = txq->nb_tx_desc;
2368
2369         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2370
2371         for (i = 0; i < cnt; i += n) {
2372                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2373                         break;
2374
2375                 n = ixgbe_tx_free_bufs(txq);
2376
2377                 if (n == 0)
2378                         break;
2379         }
2380
2381         return i;
2382 }
2383
2384 static int
2385 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2386                         uint32_t free_cnt __rte_unused)
2387 {
2388         return -ENOTSUP;
2389 }
2390
2391 int
2392 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2393 {
2394         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2395         if (txq->offloads == 0 &&
2396 #ifdef RTE_LIBRTE_SECURITY
2397                         !(txq->using_ipsec) &&
2398 #endif
2399                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2400                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2401                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2402                                         txq->sw_ring_v != NULL)) {
2403                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2404                 } else {
2405                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2406                 }
2407         }
2408
2409         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2410 }
2411
2412 static void __attribute__((cold))
2413 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2414 {
2415         if (txq != NULL &&
2416             txq->sw_ring != NULL)
2417                 rte_free(txq->sw_ring);
2418 }
2419
2420 static void __attribute__((cold))
2421 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2422 {
2423         if (txq != NULL && txq->ops != NULL) {
2424                 txq->ops->release_mbufs(txq);
2425                 txq->ops->free_swring(txq);
2426                 rte_free(txq);
2427         }
2428 }
2429
2430 void __attribute__((cold))
2431 ixgbe_dev_tx_queue_release(void *txq)
2432 {
2433         ixgbe_tx_queue_release(txq);
2434 }
2435
2436 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2437 static void __attribute__((cold))
2438 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2439 {
2440         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2441         struct ixgbe_tx_entry *txe = txq->sw_ring;
2442         uint16_t prev, i;
2443
2444         /* Zero out HW ring memory */
2445         for (i = 0; i < txq->nb_tx_desc; i++) {
2446                 txq->tx_ring[i] = zeroed_desc;
2447         }
2448
2449         /* Initialize SW ring entries */
2450         prev = (uint16_t) (txq->nb_tx_desc - 1);
2451         for (i = 0; i < txq->nb_tx_desc; i++) {
2452                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2453
2454                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2455                 txe[i].mbuf = NULL;
2456                 txe[i].last_id = i;
2457                 txe[prev].next_id = i;
2458                 prev = i;
2459         }
2460
2461         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2462         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2463
2464         txq->tx_tail = 0;
2465         txq->nb_tx_used = 0;
2466         /*
2467          * Always allow 1 descriptor to be un-allocated to avoid
2468          * a H/W race condition
2469          */
2470         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2471         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2472         txq->ctx_curr = 0;
2473         memset((void *)&txq->ctx_cache, 0,
2474                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2475 }
2476
2477 static const struct ixgbe_txq_ops def_txq_ops = {
2478         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2479         .free_swring = ixgbe_tx_free_swring,
2480         .reset = ixgbe_reset_tx_queue,
2481 };
2482
2483 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2484  * the queue parameters. Used in tx_queue_setup by primary process and then
2485  * in dev_init by secondary process when attaching to an existing ethdev.
2486  */
2487 void __attribute__((cold))
2488 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2489 {
2490         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2491         if ((txq->offloads == 0) &&
2492 #ifdef RTE_LIBRTE_SECURITY
2493                         !(txq->using_ipsec) &&
2494 #endif
2495                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2496                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2497                 dev->tx_pkt_prepare = NULL;
2498                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2499                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2500                                         ixgbe_txq_vec_setup(txq) == 0)) {
2501                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2502                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2503                 } else
2504                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2505         } else {
2506                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2507                 PMD_INIT_LOG(DEBUG,
2508                                 " - offloads = 0x%" PRIx64,
2509                                 txq->offloads);
2510                 PMD_INIT_LOG(DEBUG,
2511                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2512                                 (unsigned long)txq->tx_rs_thresh,
2513                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2514                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2515                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2516         }
2517 }
2518
2519 uint64_t
2520 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2521 {
2522         RTE_SET_USED(dev);
2523
2524         return 0;
2525 }
2526
2527 uint64_t
2528 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2529 {
2530         uint64_t tx_offload_capa;
2531         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2532
2533         tx_offload_capa =
2534                 DEV_TX_OFFLOAD_VLAN_INSERT |
2535                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2536                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2537                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2538                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2539                 DEV_TX_OFFLOAD_TCP_TSO     |
2540                 DEV_TX_OFFLOAD_MULTI_SEGS;
2541
2542         if (hw->mac.type == ixgbe_mac_82599EB ||
2543             hw->mac.type == ixgbe_mac_X540)
2544                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2545
2546         if (hw->mac.type == ixgbe_mac_X550 ||
2547             hw->mac.type == ixgbe_mac_X550EM_x ||
2548             hw->mac.type == ixgbe_mac_X550EM_a)
2549                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2550
2551 #ifdef RTE_LIBRTE_SECURITY
2552         if (dev->security_ctx)
2553                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2554 #endif
2555         return tx_offload_capa;
2556 }
2557
2558 int __attribute__((cold))
2559 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2560                          uint16_t queue_idx,
2561                          uint16_t nb_desc,
2562                          unsigned int socket_id,
2563                          const struct rte_eth_txconf *tx_conf)
2564 {
2565         const struct rte_memzone *tz;
2566         struct ixgbe_tx_queue *txq;
2567         struct ixgbe_hw     *hw;
2568         uint16_t tx_rs_thresh, tx_free_thresh;
2569         uint64_t offloads;
2570
2571         PMD_INIT_FUNC_TRACE();
2572         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2573
2574         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2575
2576         /*
2577          * Validate number of transmit descriptors.
2578          * It must not exceed hardware maximum, and must be multiple
2579          * of IXGBE_ALIGN.
2580          */
2581         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2582                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2583                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2584                 return -EINVAL;
2585         }
2586
2587         /*
2588          * The following two parameters control the setting of the RS bit on
2589          * transmit descriptors.
2590          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2591          * descriptors have been used.
2592          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2593          * descriptors are used or if the number of descriptors required
2594          * to transmit a packet is greater than the number of free TX
2595          * descriptors.
2596          * The following constraints must be satisfied:
2597          *  tx_rs_thresh must be greater than 0.
2598          *  tx_rs_thresh must be less than the size of the ring minus 2.
2599          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2600          *  tx_rs_thresh must be a divisor of the ring size.
2601          *  tx_free_thresh must be greater than 0.
2602          *  tx_free_thresh must be less than the size of the ring minus 3.
2603          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2604          * One descriptor in the TX ring is used as a sentinel to avoid a
2605          * H/W race condition, hence the maximum threshold constraints.
2606          * When set to zero use default values.
2607          */
2608         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2609                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2610         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2611         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2612                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2613         if (tx_conf->tx_rs_thresh > 0)
2614                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2615         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2616                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2617                              "exceed nb_desc. (tx_rs_thresh=%u "
2618                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2619                              (unsigned int)tx_rs_thresh,
2620                              (unsigned int)tx_free_thresh,
2621                              (unsigned int)nb_desc,
2622                              (int)dev->data->port_id,
2623                              (int)queue_idx);
2624                 return -(EINVAL);
2625         }
2626         if (tx_rs_thresh >= (nb_desc - 2)) {
2627                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2628                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2629                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2630                         (int)dev->data->port_id, (int)queue_idx);
2631                 return -(EINVAL);
2632         }
2633         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2634                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2635                         "(tx_rs_thresh=%u port=%d queue=%d)",
2636                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2637                         (int)dev->data->port_id, (int)queue_idx);
2638                 return -(EINVAL);
2639         }
2640         if (tx_free_thresh >= (nb_desc - 3)) {
2641                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2642                              "tx_free_thresh must be less than the number of "
2643                              "TX descriptors minus 3. (tx_free_thresh=%u "
2644                              "port=%d queue=%d)",
2645                              (unsigned int)tx_free_thresh,
2646                              (int)dev->data->port_id, (int)queue_idx);
2647                 return -(EINVAL);
2648         }
2649         if (tx_rs_thresh > tx_free_thresh) {
2650                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2651                              "tx_free_thresh. (tx_free_thresh=%u "
2652                              "tx_rs_thresh=%u port=%d queue=%d)",
2653                              (unsigned int)tx_free_thresh,
2654                              (unsigned int)tx_rs_thresh,
2655                              (int)dev->data->port_id,
2656                              (int)queue_idx);
2657                 return -(EINVAL);
2658         }
2659         if ((nb_desc % tx_rs_thresh) != 0) {
2660                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2661                              "number of TX descriptors. (tx_rs_thresh=%u "
2662                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2663                              (int)dev->data->port_id, (int)queue_idx);
2664                 return -(EINVAL);
2665         }
2666
2667         /*
2668          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2669          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2670          * by the NIC and all descriptors are written back after the NIC
2671          * accumulates WTHRESH descriptors.
2672          */
2673         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2674                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2675                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2676                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2677                              (int)dev->data->port_id, (int)queue_idx);
2678                 return -(EINVAL);
2679         }
2680
2681         /* Free memory prior to re-allocation if needed... */
2682         if (dev->data->tx_queues[queue_idx] != NULL) {
2683                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2684                 dev->data->tx_queues[queue_idx] = NULL;
2685         }
2686
2687         /* First allocate the tx queue data structure */
2688         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2689                                  RTE_CACHE_LINE_SIZE, socket_id);
2690         if (txq == NULL)
2691                 return -ENOMEM;
2692
2693         /*
2694          * Allocate TX ring hardware descriptors. A memzone large enough to
2695          * handle the maximum ring size is allocated in order to allow for
2696          * resizing in later calls to the queue setup function.
2697          */
2698         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2699                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2700                         IXGBE_ALIGN, socket_id);
2701         if (tz == NULL) {
2702                 ixgbe_tx_queue_release(txq);
2703                 return -ENOMEM;
2704         }
2705
2706         txq->nb_tx_desc = nb_desc;
2707         txq->tx_rs_thresh = tx_rs_thresh;
2708         txq->tx_free_thresh = tx_free_thresh;
2709         txq->pthresh = tx_conf->tx_thresh.pthresh;
2710         txq->hthresh = tx_conf->tx_thresh.hthresh;
2711         txq->wthresh = tx_conf->tx_thresh.wthresh;
2712         txq->queue_id = queue_idx;
2713         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2714                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2715         txq->port_id = dev->data->port_id;
2716         txq->offloads = offloads;
2717         txq->ops = &def_txq_ops;
2718         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2719 #ifdef RTE_LIBRTE_SECURITY
2720         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2721                         DEV_TX_OFFLOAD_SECURITY);
2722 #endif
2723
2724         /*
2725          * Modification to set VFTDT for virtual function if vf is detected
2726          */
2727         if (hw->mac.type == ixgbe_mac_82599_vf ||
2728             hw->mac.type == ixgbe_mac_X540_vf ||
2729             hw->mac.type == ixgbe_mac_X550_vf ||
2730             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2731             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2732                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2733         else
2734                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2735
2736         txq->tx_ring_phys_addr = tz->iova;
2737         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2738
2739         /* Allocate software ring */
2740         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2741                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2742                                 RTE_CACHE_LINE_SIZE, socket_id);
2743         if (txq->sw_ring == NULL) {
2744                 ixgbe_tx_queue_release(txq);
2745                 return -ENOMEM;
2746         }
2747         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2748                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2749
2750         /* set up vector or scalar TX function as appropriate */
2751         ixgbe_set_tx_function(dev, txq);
2752
2753         txq->ops->reset(txq);
2754
2755         dev->data->tx_queues[queue_idx] = txq;
2756
2757
2758         return 0;
2759 }
2760
2761 /**
2762  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2763  *
2764  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2765  * in the sw_rsc_ring is not set to NULL but rather points to the next
2766  * mbuf of this RSC aggregation (that has not been completed yet and still
2767  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2768  * will just free first "nb_segs" segments of the cluster explicitly by calling
2769  * an rte_pktmbuf_free_seg().
2770  *
2771  * @m scattered cluster head
2772  */
2773 static void __attribute__((cold))
2774 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2775 {
2776         uint16_t i, nb_segs = m->nb_segs;
2777         struct rte_mbuf *next_seg;
2778
2779         for (i = 0; i < nb_segs; i++) {
2780                 next_seg = m->next;
2781                 rte_pktmbuf_free_seg(m);
2782                 m = next_seg;
2783         }
2784 }
2785
2786 static void __attribute__((cold))
2787 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2788 {
2789         unsigned i;
2790
2791         /* SSE Vector driver has a different way of releasing mbufs. */
2792         if (rxq->rx_using_sse) {
2793                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2794                 return;
2795         }
2796
2797         if (rxq->sw_ring != NULL) {
2798                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2799                         if (rxq->sw_ring[i].mbuf != NULL) {
2800                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2801                                 rxq->sw_ring[i].mbuf = NULL;
2802                         }
2803                 }
2804                 if (rxq->rx_nb_avail) {
2805                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2806                                 struct rte_mbuf *mb;
2807
2808                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2809                                 rte_pktmbuf_free_seg(mb);
2810                         }
2811                         rxq->rx_nb_avail = 0;
2812                 }
2813         }
2814
2815         if (rxq->sw_sc_ring)
2816                 for (i = 0; i < rxq->nb_rx_desc; i++)
2817                         if (rxq->sw_sc_ring[i].fbuf) {
2818                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2819                                 rxq->sw_sc_ring[i].fbuf = NULL;
2820                         }
2821 }
2822
2823 static void __attribute__((cold))
2824 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2825 {
2826         if (rxq != NULL) {
2827                 ixgbe_rx_queue_release_mbufs(rxq);
2828                 rte_free(rxq->sw_ring);
2829                 rte_free(rxq->sw_sc_ring);
2830                 rte_free(rxq);
2831         }
2832 }
2833
2834 void __attribute__((cold))
2835 ixgbe_dev_rx_queue_release(void *rxq)
2836 {
2837         ixgbe_rx_queue_release(rxq);
2838 }
2839
2840 /*
2841  * Check if Rx Burst Bulk Alloc function can be used.
2842  * Return
2843  *        0: the preconditions are satisfied and the bulk allocation function
2844  *           can be used.
2845  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2846  *           function must be used.
2847  */
2848 static inline int __attribute__((cold))
2849 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2850 {
2851         int ret = 0;
2852
2853         /*
2854          * Make sure the following pre-conditions are satisfied:
2855          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2856          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2857          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2858          * Scattered packets are not supported.  This should be checked
2859          * outside of this function.
2860          */
2861         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2862                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2863                              "rxq->rx_free_thresh=%d, "
2864                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2865                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2866                 ret = -EINVAL;
2867         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2868                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2869                              "rxq->rx_free_thresh=%d, "
2870                              "rxq->nb_rx_desc=%d",
2871                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2872                 ret = -EINVAL;
2873         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2874                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2875                              "rxq->nb_rx_desc=%d, "
2876                              "rxq->rx_free_thresh=%d",
2877                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2878                 ret = -EINVAL;
2879         }
2880
2881         return ret;
2882 }
2883
2884 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2885 static void __attribute__((cold))
2886 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2887 {
2888         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2889         unsigned i;
2890         uint16_t len = rxq->nb_rx_desc;
2891
2892         /*
2893          * By default, the Rx queue setup function allocates enough memory for
2894          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2895          * extra memory at the end of the descriptor ring to be zero'd out.
2896          */
2897         if (adapter->rx_bulk_alloc_allowed)
2898                 /* zero out extra memory */
2899                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2900
2901         /*
2902          * Zero out HW ring memory. Zero out extra memory at the end of
2903          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2904          * reads extra memory as zeros.
2905          */
2906         for (i = 0; i < len; i++) {
2907                 rxq->rx_ring[i] = zeroed_desc;
2908         }
2909
2910         /*
2911          * initialize extra software ring entries. Space for these extra
2912          * entries is always allocated
2913          */
2914         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2915         for (i = rxq->nb_rx_desc; i < len; ++i) {
2916                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2917         }
2918
2919         rxq->rx_nb_avail = 0;
2920         rxq->rx_next_avail = 0;
2921         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2922         rxq->rx_tail = 0;
2923         rxq->nb_rx_hold = 0;
2924         rxq->pkt_first_seg = NULL;
2925         rxq->pkt_last_seg = NULL;
2926
2927 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2928         rxq->rxrearm_start = 0;
2929         rxq->rxrearm_nb = 0;
2930 #endif
2931 }
2932
2933 static int
2934 ixgbe_is_vf(struct rte_eth_dev *dev)
2935 {
2936         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2937
2938         switch (hw->mac.type) {
2939         case ixgbe_mac_82599_vf:
2940         case ixgbe_mac_X540_vf:
2941         case ixgbe_mac_X550_vf:
2942         case ixgbe_mac_X550EM_x_vf:
2943         case ixgbe_mac_X550EM_a_vf:
2944                 return 1;
2945         default:
2946                 return 0;
2947         }
2948 }
2949
2950 uint64_t
2951 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2952 {
2953         uint64_t offloads = 0;
2954         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2955
2956         if (hw->mac.type != ixgbe_mac_82598EB)
2957                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2958
2959         return offloads;
2960 }
2961
2962 uint64_t
2963 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2964 {
2965         uint64_t offloads;
2966         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2967
2968         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2969                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2970                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2971                    DEV_RX_OFFLOAD_KEEP_CRC    |
2972                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2973                    DEV_RX_OFFLOAD_VLAN_FILTER |
2974                    DEV_RX_OFFLOAD_SCATTER |
2975                    DEV_RX_OFFLOAD_RSS_HASH;
2976
2977         if (hw->mac.type == ixgbe_mac_82598EB)
2978                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2979
2980         if (ixgbe_is_vf(dev) == 0)
2981                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2982
2983         /*
2984          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2985          * mode.
2986          */
2987         if ((hw->mac.type == ixgbe_mac_82599EB ||
2988              hw->mac.type == ixgbe_mac_X540 ||
2989              hw->mac.type == ixgbe_mac_X550) &&
2990             !RTE_ETH_DEV_SRIOV(dev).active)
2991                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
2992
2993         if (hw->mac.type == ixgbe_mac_82599EB ||
2994             hw->mac.type == ixgbe_mac_X540)
2995                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
2996
2997         if (hw->mac.type == ixgbe_mac_X550 ||
2998             hw->mac.type == ixgbe_mac_X550EM_x ||
2999             hw->mac.type == ixgbe_mac_X550EM_a)
3000                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3001
3002 #ifdef RTE_LIBRTE_SECURITY
3003         if (dev->security_ctx)
3004                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3005 #endif
3006
3007         return offloads;
3008 }
3009
3010 int __attribute__((cold))
3011 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3012                          uint16_t queue_idx,
3013                          uint16_t nb_desc,
3014                          unsigned int socket_id,
3015                          const struct rte_eth_rxconf *rx_conf,
3016                          struct rte_mempool *mp)
3017 {
3018         const struct rte_memzone *rz;
3019         struct ixgbe_rx_queue *rxq;
3020         struct ixgbe_hw     *hw;
3021         uint16_t len;
3022         struct ixgbe_adapter *adapter = dev->data->dev_private;
3023         uint64_t offloads;
3024
3025         PMD_INIT_FUNC_TRACE();
3026         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3027
3028         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3029
3030         /*
3031          * Validate number of receive descriptors.
3032          * It must not exceed hardware maximum, and must be multiple
3033          * of IXGBE_ALIGN.
3034          */
3035         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3036                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3037                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3038                 return -EINVAL;
3039         }
3040
3041         /* Free memory prior to re-allocation if needed... */
3042         if (dev->data->rx_queues[queue_idx] != NULL) {
3043                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3044                 dev->data->rx_queues[queue_idx] = NULL;
3045         }
3046
3047         /* First allocate the rx queue data structure */
3048         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3049                                  RTE_CACHE_LINE_SIZE, socket_id);
3050         if (rxq == NULL)
3051                 return -ENOMEM;
3052         rxq->mb_pool = mp;
3053         rxq->nb_rx_desc = nb_desc;
3054         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3055         rxq->queue_id = queue_idx;
3056         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3057                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3058         rxq->port_id = dev->data->port_id;
3059         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3060                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3061         else
3062                 rxq->crc_len = 0;
3063         rxq->drop_en = rx_conf->rx_drop_en;
3064         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3065         rxq->offloads = offloads;
3066
3067         /*
3068          * The packet type in RX descriptor is different for different NICs.
3069          * Some bits are used for x550 but reserved for other NICS.
3070          * So set different masks for different NICs.
3071          */
3072         if (hw->mac.type == ixgbe_mac_X550 ||
3073             hw->mac.type == ixgbe_mac_X550EM_x ||
3074             hw->mac.type == ixgbe_mac_X550EM_a ||
3075             hw->mac.type == ixgbe_mac_X550_vf ||
3076             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3077             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3078                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3079         else
3080                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3081
3082         /*
3083          * Allocate RX ring hardware descriptors. A memzone large enough to
3084          * handle the maximum ring size is allocated in order to allow for
3085          * resizing in later calls to the queue setup function.
3086          */
3087         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3088                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3089         if (rz == NULL) {
3090                 ixgbe_rx_queue_release(rxq);
3091                 return -ENOMEM;
3092         }
3093
3094         /*
3095          * Zero init all the descriptors in the ring.
3096          */
3097         memset(rz->addr, 0, RX_RING_SZ);
3098
3099         /*
3100          * Modified to setup VFRDT for Virtual Function
3101          */
3102         if (hw->mac.type == ixgbe_mac_82599_vf ||
3103             hw->mac.type == ixgbe_mac_X540_vf ||
3104             hw->mac.type == ixgbe_mac_X550_vf ||
3105             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3106             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3107                 rxq->rdt_reg_addr =
3108                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3109                 rxq->rdh_reg_addr =
3110                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3111         } else {
3112                 rxq->rdt_reg_addr =
3113                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3114                 rxq->rdh_reg_addr =
3115                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3116         }
3117
3118         rxq->rx_ring_phys_addr = rz->iova;
3119         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3120
3121         /*
3122          * Certain constraints must be met in order to use the bulk buffer
3123          * allocation Rx burst function. If any of Rx queues doesn't meet them
3124          * the feature should be disabled for the whole port.
3125          */
3126         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3127                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3128                                     "preconditions - canceling the feature for "
3129                                     "the whole port[%d]",
3130                              rxq->queue_id, rxq->port_id);
3131                 adapter->rx_bulk_alloc_allowed = false;
3132         }
3133
3134         /*
3135          * Allocate software ring. Allow for space at the end of the
3136          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3137          * function does not access an invalid memory region.
3138          */
3139         len = nb_desc;
3140         if (adapter->rx_bulk_alloc_allowed)
3141                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3142
3143         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3144                                           sizeof(struct ixgbe_rx_entry) * len,
3145                                           RTE_CACHE_LINE_SIZE, socket_id);
3146         if (!rxq->sw_ring) {
3147                 ixgbe_rx_queue_release(rxq);
3148                 return -ENOMEM;
3149         }
3150
3151         /*
3152          * Always allocate even if it's not going to be needed in order to
3153          * simplify the code.
3154          *
3155          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3156          * be requested in ixgbe_dev_rx_init(), which is called later from
3157          * dev_start() flow.
3158          */
3159         rxq->sw_sc_ring =
3160                 rte_zmalloc_socket("rxq->sw_sc_ring",
3161                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3162                                    RTE_CACHE_LINE_SIZE, socket_id);
3163         if (!rxq->sw_sc_ring) {
3164                 ixgbe_rx_queue_release(rxq);
3165                 return -ENOMEM;
3166         }
3167
3168         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3169                             "dma_addr=0x%"PRIx64,
3170                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3171                      rxq->rx_ring_phys_addr);
3172
3173         if (!rte_is_power_of_2(nb_desc)) {
3174                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3175                                     "preconditions - canceling the feature for "
3176                                     "the whole port[%d]",
3177                              rxq->queue_id, rxq->port_id);
3178                 adapter->rx_vec_allowed = false;
3179         } else
3180                 ixgbe_rxq_vec_setup(rxq);
3181
3182         dev->data->rx_queues[queue_idx] = rxq;
3183
3184         ixgbe_reset_rx_queue(adapter, rxq);
3185
3186         return 0;
3187 }
3188
3189 uint32_t
3190 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3191 {
3192 #define IXGBE_RXQ_SCAN_INTERVAL 4
3193         volatile union ixgbe_adv_rx_desc *rxdp;
3194         struct ixgbe_rx_queue *rxq;
3195         uint32_t desc = 0;
3196
3197         rxq = dev->data->rx_queues[rx_queue_id];
3198         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3199
3200         while ((desc < rxq->nb_rx_desc) &&
3201                 (rxdp->wb.upper.status_error &
3202                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3203                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3204                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3205                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3206                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3207                                 desc - rxq->nb_rx_desc]);
3208         }
3209
3210         return desc;
3211 }
3212
3213 int
3214 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3215 {
3216         volatile union ixgbe_adv_rx_desc *rxdp;
3217         struct ixgbe_rx_queue *rxq = rx_queue;
3218         uint32_t desc;
3219
3220         if (unlikely(offset >= rxq->nb_rx_desc))
3221                 return 0;
3222         desc = rxq->rx_tail + offset;
3223         if (desc >= rxq->nb_rx_desc)
3224                 desc -= rxq->nb_rx_desc;
3225
3226         rxdp = &rxq->rx_ring[desc];
3227         return !!(rxdp->wb.upper.status_error &
3228                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3229 }
3230
3231 int
3232 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3233 {
3234         struct ixgbe_rx_queue *rxq = rx_queue;
3235         volatile uint32_t *status;
3236         uint32_t nb_hold, desc;
3237
3238         if (unlikely(offset >= rxq->nb_rx_desc))
3239                 return -EINVAL;
3240
3241 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3242         if (rxq->rx_using_sse)
3243                 nb_hold = rxq->rxrearm_nb;
3244         else
3245 #endif
3246                 nb_hold = rxq->nb_rx_hold;
3247         if (offset >= rxq->nb_rx_desc - nb_hold)
3248                 return RTE_ETH_RX_DESC_UNAVAIL;
3249
3250         desc = rxq->rx_tail + offset;
3251         if (desc >= rxq->nb_rx_desc)
3252                 desc -= rxq->nb_rx_desc;
3253
3254         status = &rxq->rx_ring[desc].wb.upper.status_error;
3255         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3256                 return RTE_ETH_RX_DESC_DONE;
3257
3258         return RTE_ETH_RX_DESC_AVAIL;
3259 }
3260
3261 int
3262 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3263 {
3264         struct ixgbe_tx_queue *txq = tx_queue;
3265         volatile uint32_t *status;
3266         uint32_t desc;
3267
3268         if (unlikely(offset >= txq->nb_tx_desc))
3269                 return -EINVAL;
3270
3271         desc = txq->tx_tail + offset;
3272         /* go to next desc that has the RS bit */
3273         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3274                 txq->tx_rs_thresh;
3275         if (desc >= txq->nb_tx_desc) {
3276                 desc -= txq->nb_tx_desc;
3277                 if (desc >= txq->nb_tx_desc)
3278                         desc -= txq->nb_tx_desc;
3279         }
3280
3281         status = &txq->tx_ring[desc].wb.status;
3282         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3283                 return RTE_ETH_TX_DESC_DONE;
3284
3285         return RTE_ETH_TX_DESC_FULL;
3286 }
3287
3288 /*
3289  * Set up link loopback for X540/X550 mode Tx->Rx.
3290  */
3291 static inline void __attribute__((cold))
3292 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3293 {
3294         uint32_t macc;
3295         PMD_INIT_FUNC_TRACE();
3296
3297         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3298
3299         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3300                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3301         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3302
3303         if (enable) {
3304                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3305                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3306                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3307                 macc |= IXGBE_MACC_FLU;
3308         } else {
3309                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3310                 macc &= ~IXGBE_MACC_FLU;
3311         }
3312
3313         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3314                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3315
3316         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3317 }
3318
3319 void __attribute__((cold))
3320 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3321 {
3322         unsigned i;
3323         struct ixgbe_adapter *adapter = dev->data->dev_private;
3324         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3325
3326         PMD_INIT_FUNC_TRACE();
3327
3328         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3329                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3330
3331                 if (txq != NULL) {
3332                         txq->ops->release_mbufs(txq);
3333                         txq->ops->reset(txq);
3334                 }
3335         }
3336
3337         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3338                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3339
3340                 if (rxq != NULL) {
3341                         ixgbe_rx_queue_release_mbufs(rxq);
3342                         ixgbe_reset_rx_queue(adapter, rxq);
3343                 }
3344         }
3345         /* If loopback mode was enabled, reconfigure the link accordingly */
3346         if (dev->data->dev_conf.lpbk_mode != 0) {
3347                 if (hw->mac.type == ixgbe_mac_X540 ||
3348                      hw->mac.type == ixgbe_mac_X550 ||
3349                      hw->mac.type == ixgbe_mac_X550EM_x ||
3350                      hw->mac.type == ixgbe_mac_X550EM_a)
3351                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3352         }
3353 }
3354
3355 void
3356 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3357 {
3358         unsigned i;
3359
3360         PMD_INIT_FUNC_TRACE();
3361
3362         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3363                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3364                 dev->data->rx_queues[i] = NULL;
3365         }
3366         dev->data->nb_rx_queues = 0;
3367
3368         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3369                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3370                 dev->data->tx_queues[i] = NULL;
3371         }
3372         dev->data->nb_tx_queues = 0;
3373 }
3374
3375 /*********************************************************************
3376  *
3377  *  Device RX/TX init functions
3378  *
3379  **********************************************************************/
3380
3381 /**
3382  * Receive Side Scaling (RSS)
3383  * See section 7.1.2.8 in the following document:
3384  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3385  *
3386  * Principles:
3387  * The source and destination IP addresses of the IP header and the source
3388  * and destination ports of TCP/UDP headers, if any, of received packets are
3389  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3390  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3391  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3392  * RSS output index which is used as the RX queue index where to store the
3393  * received packets.
3394  * The following output is supplied in the RX write-back descriptor:
3395  *     - 32-bit result of the Microsoft RSS hash function,
3396  *     - 4-bit RSS type field.
3397  */
3398
3399 /*
3400  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3401  * Used as the default key.
3402  */
3403 static uint8_t rss_intel_key[40] = {
3404         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3405         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3406         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3407         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3408         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3409 };
3410
3411 static void
3412 ixgbe_rss_disable(struct rte_eth_dev *dev)
3413 {
3414         struct ixgbe_hw *hw;
3415         uint32_t mrqc;
3416         uint32_t mrqc_reg;
3417
3418         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3419         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3420         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3421         mrqc &= ~IXGBE_MRQC_RSSEN;
3422         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3423 }
3424
3425 static void
3426 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3427 {
3428         uint8_t  *hash_key;
3429         uint32_t mrqc;
3430         uint32_t rss_key;
3431         uint64_t rss_hf;
3432         uint16_t i;
3433         uint32_t mrqc_reg;
3434         uint32_t rssrk_reg;
3435
3436         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3437         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3438
3439         hash_key = rss_conf->rss_key;
3440         if (hash_key != NULL) {
3441                 /* Fill in RSS hash key */
3442                 for (i = 0; i < 10; i++) {
3443                         rss_key  = hash_key[(i * 4)];
3444                         rss_key |= hash_key[(i * 4) + 1] << 8;
3445                         rss_key |= hash_key[(i * 4) + 2] << 16;
3446                         rss_key |= hash_key[(i * 4) + 3] << 24;
3447                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3448                 }
3449         }
3450
3451         /* Set configured hashing protocols in MRQC register */
3452         rss_hf = rss_conf->rss_hf;
3453         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3454         if (rss_hf & ETH_RSS_IPV4)
3455                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3456         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3457                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3458         if (rss_hf & ETH_RSS_IPV6)
3459                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3460         if (rss_hf & ETH_RSS_IPV6_EX)
3461                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3462         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3463                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3464         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3465                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3466         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3467                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3468         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3469                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3470         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3471                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3472         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3473 }
3474
3475 int
3476 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3477                           struct rte_eth_rss_conf *rss_conf)
3478 {
3479         struct ixgbe_hw *hw;
3480         uint32_t mrqc;
3481         uint64_t rss_hf;
3482         uint32_t mrqc_reg;
3483
3484         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3485
3486         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3487                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3488                         "NIC.");
3489                 return -ENOTSUP;
3490         }
3491         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3492
3493         /*
3494          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3495          *     "RSS enabling cannot be done dynamically while it must be
3496          *      preceded by a software reset"
3497          * Before changing anything, first check that the update RSS operation
3498          * does not attempt to disable RSS, if RSS was enabled at
3499          * initialization time, or does not attempt to enable RSS, if RSS was
3500          * disabled at initialization time.
3501          */
3502         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3503         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3504         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3505                 if (rss_hf != 0) /* Enable RSS */
3506                         return -(EINVAL);
3507                 return 0; /* Nothing to do */
3508         }
3509         /* RSS enabled */
3510         if (rss_hf == 0) /* Disable RSS */
3511                 return -(EINVAL);
3512         ixgbe_hw_rss_hash_set(hw, rss_conf);
3513         return 0;
3514 }
3515
3516 int
3517 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3518                             struct rte_eth_rss_conf *rss_conf)
3519 {
3520         struct ixgbe_hw *hw;
3521         uint8_t *hash_key;
3522         uint32_t mrqc;
3523         uint32_t rss_key;
3524         uint64_t rss_hf;
3525         uint16_t i;
3526         uint32_t mrqc_reg;
3527         uint32_t rssrk_reg;
3528
3529         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3530         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3531         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3532         hash_key = rss_conf->rss_key;
3533         if (hash_key != NULL) {
3534                 /* Return RSS hash key */
3535                 for (i = 0; i < 10; i++) {
3536                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3537                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3538                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3539                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3540                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3541                 }
3542         }
3543
3544         /* Get RSS functions configured in MRQC register */
3545         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3546         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3547                 rss_conf->rss_hf = 0;
3548                 return 0;
3549         }
3550         rss_hf = 0;
3551         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3552                 rss_hf |= ETH_RSS_IPV4;
3553         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3554                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3555         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3556                 rss_hf |= ETH_RSS_IPV6;
3557         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3558                 rss_hf |= ETH_RSS_IPV6_EX;
3559         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3560                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3561         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3562                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3563         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3564                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3565         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3566                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3567         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3568                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3569         rss_conf->rss_hf = rss_hf;
3570         return 0;
3571 }
3572
3573 static void
3574 ixgbe_rss_configure(struct rte_eth_dev *dev)
3575 {
3576         struct rte_eth_rss_conf rss_conf;
3577         struct ixgbe_adapter *adapter;
3578         struct ixgbe_hw *hw;
3579         uint32_t reta;
3580         uint16_t i;
3581         uint16_t j;
3582         uint16_t sp_reta_size;
3583         uint32_t reta_reg;
3584
3585         PMD_INIT_FUNC_TRACE();
3586         adapter = dev->data->dev_private;
3587         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3588
3589         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3590
3591         /*
3592          * Fill in redirection table
3593          * The byte-swap is needed because NIC registers are in
3594          * little-endian order.
3595          */
3596         if (adapter->rss_reta_updated == 0) {
3597                 reta = 0;
3598                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3599                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3600
3601                         if (j == dev->data->nb_rx_queues)
3602                                 j = 0;
3603                         reta = (reta << 8) | j;
3604                         if ((i & 3) == 3)
3605                                 IXGBE_WRITE_REG(hw, reta_reg,
3606                                                 rte_bswap32(reta));
3607                 }
3608         }
3609
3610         /*
3611          * Configure the RSS key and the RSS protocols used to compute
3612          * the RSS hash of input packets.
3613          */
3614         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3615         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3616                 ixgbe_rss_disable(dev);
3617                 return;
3618         }
3619         if (rss_conf.rss_key == NULL)
3620                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3621         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3622 }
3623
3624 #define NUM_VFTA_REGISTERS 128
3625 #define NIC_RX_BUFFER_SIZE 0x200
3626 #define X550_RX_BUFFER_SIZE 0x180
3627
3628 static void
3629 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3630 {
3631         struct rte_eth_vmdq_dcb_conf *cfg;
3632         struct ixgbe_hw *hw;
3633         enum rte_eth_nb_pools num_pools;
3634         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3635         uint16_t pbsize;
3636         uint8_t nb_tcs; /* number of traffic classes */
3637         int i;
3638
3639         PMD_INIT_FUNC_TRACE();
3640         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3641         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3642         num_pools = cfg->nb_queue_pools;
3643         /* Check we have a valid number of pools */
3644         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3645                 ixgbe_rss_disable(dev);
3646                 return;
3647         }
3648         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3649         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3650
3651         /*
3652          * RXPBSIZE
3653          * split rx buffer up into sections, each for 1 traffic class
3654          */
3655         switch (hw->mac.type) {
3656         case ixgbe_mac_X550:
3657         case ixgbe_mac_X550EM_x:
3658         case ixgbe_mac_X550EM_a:
3659                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3660                 break;
3661         default:
3662                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3663                 break;
3664         }
3665         for (i = 0; i < nb_tcs; i++) {
3666                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3667
3668                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3669                 /* clear 10 bits. */
3670                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3671                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3672         }
3673         /* zero alloc all unused TCs */
3674         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3675                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3676
3677                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3678                 /* clear 10 bits. */
3679                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3680         }
3681
3682         /* MRQC: enable vmdq and dcb */
3683         mrqc = (num_pools == ETH_16_POOLS) ?
3684                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3685         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3686
3687         /* PFVTCTL: turn on virtualisation and set the default pool */
3688         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3689         if (cfg->enable_default_pool) {
3690                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3691         } else {
3692                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3693         }
3694
3695         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3696
3697         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3698         queue_mapping = 0;
3699         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3700                 /*
3701                  * mapping is done with 3 bits per priority,
3702                  * so shift by i*3 each time
3703                  */
3704                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3705
3706         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3707
3708         /* RTRPCS: DCB related */
3709         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3710
3711         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3712         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3713         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3714         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3715
3716         /* VFTA - enable all vlan filters */
3717         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3718                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3719         }
3720
3721         /* VFRE: pool enabling for receive - 16 or 32 */
3722         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3723                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3724
3725         /*
3726          * MPSAR - allow pools to read specific mac addresses
3727          * In this case, all pools should be able to read from mac addr 0
3728          */
3729         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3730         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3731
3732         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3733         for (i = 0; i < cfg->nb_pool_maps; i++) {
3734                 /* set vlan id in VF register and set the valid bit */
3735                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3736                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3737                 /*
3738                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3739                  * pools, we only need to use the first half of the register
3740                  * i.e. bits 0-31
3741                  */
3742                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3743         }
3744 }
3745
3746 /**
3747  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3748  * @dev: pointer to eth_dev structure
3749  * @dcb_config: pointer to ixgbe_dcb_config structure
3750  */
3751 static void
3752 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3753                        struct ixgbe_dcb_config *dcb_config)
3754 {
3755         uint32_t reg;
3756         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3757
3758         PMD_INIT_FUNC_TRACE();
3759         if (hw->mac.type != ixgbe_mac_82598EB) {
3760                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3761                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3762                 reg |= IXGBE_RTTDCS_ARBDIS;
3763                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3764
3765                 /* Enable DCB for Tx with 8 TCs */
3766                 if (dcb_config->num_tcs.pg_tcs == 8) {
3767                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3768                 } else {
3769                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3770                 }
3771                 if (dcb_config->vt_mode)
3772                         reg |= IXGBE_MTQC_VT_ENA;
3773                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3774
3775                 /* Enable the Tx desc arbiter */
3776                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3777                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3778                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3779
3780                 /* Enable Security TX Buffer IFG for DCB */
3781                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3782                 reg |= IXGBE_SECTX_DCB;
3783                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3784         }
3785 }
3786
3787 /**
3788  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3789  * @dev: pointer to rte_eth_dev structure
3790  * @dcb_config: pointer to ixgbe_dcb_config structure
3791  */
3792 static void
3793 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3794                         struct ixgbe_dcb_config *dcb_config)
3795 {
3796         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3797                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3798         struct ixgbe_hw *hw =
3799                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3800
3801         PMD_INIT_FUNC_TRACE();
3802         if (hw->mac.type != ixgbe_mac_82598EB)
3803                 /*PF VF Transmit Enable*/
3804                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3805                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3806
3807         /*Configure general DCB TX parameters*/
3808         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3809 }
3810
3811 static void
3812 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3813                         struct ixgbe_dcb_config *dcb_config)
3814 {
3815         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3816                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3817         struct ixgbe_dcb_tc_config *tc;
3818         uint8_t i, j;
3819
3820         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3821         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3822                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3823                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3824         } else {
3825                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3826                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3827         }
3828
3829         /* Initialize User Priority to Traffic Class mapping */
3830         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3831                 tc = &dcb_config->tc_config[j];
3832                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3833         }
3834
3835         /* User Priority to Traffic Class mapping */
3836         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3837                 j = vmdq_rx_conf->dcb_tc[i];
3838                 tc = &dcb_config->tc_config[j];
3839                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3840                                                 (uint8_t)(1 << i);
3841         }
3842 }
3843
3844 static void
3845 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3846                         struct ixgbe_dcb_config *dcb_config)
3847 {
3848         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3849                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3850         struct ixgbe_dcb_tc_config *tc;
3851         uint8_t i, j;
3852
3853         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3854         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3855                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3856                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3857         } else {
3858                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3859                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3860         }
3861
3862         /* Initialize User Priority to Traffic Class mapping */
3863         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3864                 tc = &dcb_config->tc_config[j];
3865                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3866         }
3867
3868         /* User Priority to Traffic Class mapping */
3869         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3870                 j = vmdq_tx_conf->dcb_tc[i];
3871                 tc = &dcb_config->tc_config[j];
3872                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3873                                                 (uint8_t)(1 << i);
3874         }
3875 }
3876
3877 static void
3878 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3879                 struct ixgbe_dcb_config *dcb_config)
3880 {
3881         struct rte_eth_dcb_rx_conf *rx_conf =
3882                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3883         struct ixgbe_dcb_tc_config *tc;
3884         uint8_t i, j;
3885
3886         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3887         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3888
3889         /* Initialize User Priority to Traffic Class mapping */
3890         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3891                 tc = &dcb_config->tc_config[j];
3892                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3893         }
3894
3895         /* User Priority to Traffic Class mapping */
3896         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3897                 j = rx_conf->dcb_tc[i];
3898                 tc = &dcb_config->tc_config[j];
3899                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3900                                                 (uint8_t)(1 << i);
3901         }
3902 }
3903
3904 static void
3905 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3906                 struct ixgbe_dcb_config *dcb_config)
3907 {
3908         struct rte_eth_dcb_tx_conf *tx_conf =
3909                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3910         struct ixgbe_dcb_tc_config *tc;
3911         uint8_t i, j;
3912
3913         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3914         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3915
3916         /* Initialize User Priority to Traffic Class mapping */
3917         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3918                 tc = &dcb_config->tc_config[j];
3919                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3920         }
3921
3922         /* User Priority to Traffic Class mapping */
3923         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3924                 j = tx_conf->dcb_tc[i];
3925                 tc = &dcb_config->tc_config[j];
3926                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3927                                                 (uint8_t)(1 << i);
3928         }
3929 }
3930
3931 /**
3932  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3933  * @dev: pointer to eth_dev structure
3934  * @dcb_config: pointer to ixgbe_dcb_config structure
3935  */
3936 static void
3937 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3938                        struct ixgbe_dcb_config *dcb_config)
3939 {
3940         uint32_t reg;
3941         uint32_t vlanctrl;
3942         uint8_t i;
3943         uint32_t q;
3944         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3945
3946         PMD_INIT_FUNC_TRACE();
3947         /*
3948          * Disable the arbiter before changing parameters
3949          * (always enable recycle mode; WSP)
3950          */
3951         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3952         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3953
3954         if (hw->mac.type != ixgbe_mac_82598EB) {
3955                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3956                 if (dcb_config->num_tcs.pg_tcs == 4) {
3957                         if (dcb_config->vt_mode)
3958                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3959                                         IXGBE_MRQC_VMDQRT4TCEN;
3960                         else {
3961                                 /* no matter the mode is DCB or DCB_RSS, just
3962                                  * set the MRQE to RSSXTCEN. RSS is controlled
3963                                  * by RSS_FIELD
3964                                  */
3965                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3966                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3967                                         IXGBE_MRQC_RTRSS4TCEN;
3968                         }
3969                 }
3970                 if (dcb_config->num_tcs.pg_tcs == 8) {
3971                         if (dcb_config->vt_mode)
3972                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3973                                         IXGBE_MRQC_VMDQRT8TCEN;
3974                         else {
3975                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3976                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3977                                         IXGBE_MRQC_RTRSS8TCEN;
3978                         }
3979                 }
3980
3981                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3982
3983                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3984                         /* Disable drop for all queues in VMDQ mode*/
3985                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3986                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3987                                                 (IXGBE_QDE_WRITE |
3988                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3989                 } else {
3990                         /* Enable drop for all queues in SRIOV mode */
3991                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3992                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3993                                                 (IXGBE_QDE_WRITE |
3994                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3995                                                  IXGBE_QDE_ENABLE));
3996                 }
3997         }
3998
3999         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4000         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4001         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4002         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4003
4004         /* VFTA - enable all vlan filters */
4005         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4006                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4007         }
4008
4009         /*
4010          * Configure Rx packet plane (recycle mode; WSP) and
4011          * enable arbiter
4012          */
4013         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4014         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4015 }
4016
4017 static void
4018 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4019                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4020 {
4021         switch (hw->mac.type) {
4022         case ixgbe_mac_82598EB:
4023                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4024                 break;
4025         case ixgbe_mac_82599EB:
4026         case ixgbe_mac_X540:
4027         case ixgbe_mac_X550:
4028         case ixgbe_mac_X550EM_x:
4029         case ixgbe_mac_X550EM_a:
4030                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4031                                                   tsa, map);
4032                 break;
4033         default:
4034                 break;
4035         }
4036 }
4037
4038 static void
4039 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4040                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4041 {
4042         switch (hw->mac.type) {
4043         case ixgbe_mac_82598EB:
4044                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4045                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4046                 break;
4047         case ixgbe_mac_82599EB:
4048         case ixgbe_mac_X540:
4049         case ixgbe_mac_X550:
4050         case ixgbe_mac_X550EM_x:
4051         case ixgbe_mac_X550EM_a:
4052                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4053                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4054                 break;
4055         default:
4056                 break;
4057         }
4058 }
4059
4060 #define DCB_RX_CONFIG  1
4061 #define DCB_TX_CONFIG  1
4062 #define DCB_TX_PB      1024
4063 /**
4064  * ixgbe_dcb_hw_configure - Enable DCB and configure
4065  * general DCB in VT mode and non-VT mode parameters
4066  * @dev: pointer to rte_eth_dev structure
4067  * @dcb_config: pointer to ixgbe_dcb_config structure
4068  */
4069 static int
4070 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4071                         struct ixgbe_dcb_config *dcb_config)
4072 {
4073         int     ret = 0;
4074         uint8_t i, pfc_en, nb_tcs;
4075         uint16_t pbsize, rx_buffer_size;
4076         uint8_t config_dcb_rx = 0;
4077         uint8_t config_dcb_tx = 0;
4078         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4079         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4080         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4081         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4082         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4083         struct ixgbe_dcb_tc_config *tc;
4084         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4085                 RTE_ETHER_CRC_LEN;
4086         struct ixgbe_hw *hw =
4087                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4088         struct ixgbe_bw_conf *bw_conf =
4089                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4090
4091         switch (dev->data->dev_conf.rxmode.mq_mode) {
4092         case ETH_MQ_RX_VMDQ_DCB:
4093                 dcb_config->vt_mode = true;
4094                 if (hw->mac.type != ixgbe_mac_82598EB) {
4095                         config_dcb_rx = DCB_RX_CONFIG;
4096                         /*
4097                          *get dcb and VT rx configuration parameters
4098                          *from rte_eth_conf
4099                          */
4100                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4101                         /*Configure general VMDQ and DCB RX parameters*/
4102                         ixgbe_vmdq_dcb_configure(dev);
4103                 }
4104                 break;
4105         case ETH_MQ_RX_DCB:
4106         case ETH_MQ_RX_DCB_RSS:
4107                 dcb_config->vt_mode = false;
4108                 config_dcb_rx = DCB_RX_CONFIG;
4109                 /* Get dcb TX configuration parameters from rte_eth_conf */
4110                 ixgbe_dcb_rx_config(dev, dcb_config);
4111                 /*Configure general DCB RX parameters*/
4112                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4113                 break;
4114         default:
4115                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4116                 break;
4117         }
4118         switch (dev->data->dev_conf.txmode.mq_mode) {
4119         case ETH_MQ_TX_VMDQ_DCB:
4120                 dcb_config->vt_mode = true;
4121                 config_dcb_tx = DCB_TX_CONFIG;
4122                 /* get DCB and VT TX configuration parameters
4123                  * from rte_eth_conf
4124                  */
4125                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4126                 /*Configure general VMDQ and DCB TX parameters*/
4127                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4128                 break;
4129
4130         case ETH_MQ_TX_DCB:
4131                 dcb_config->vt_mode = false;
4132                 config_dcb_tx = DCB_TX_CONFIG;
4133                 /*get DCB TX configuration parameters from rte_eth_conf*/
4134                 ixgbe_dcb_tx_config(dev, dcb_config);
4135                 /*Configure general DCB TX parameters*/
4136                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4137                 break;
4138         default:
4139                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4140                 break;
4141         }
4142
4143         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4144         /* Unpack map */
4145         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4146         if (nb_tcs == ETH_4_TCS) {
4147                 /* Avoid un-configured priority mapping to TC0 */
4148                 uint8_t j = 4;
4149                 uint8_t mask = 0xFF;
4150
4151                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4152                         mask = (uint8_t)(mask & (~(1 << map[i])));
4153                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4154                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4155                                 map[j++] = i;
4156                         mask >>= 1;
4157                 }
4158                 /* Re-configure 4 TCs BW */
4159                 for (i = 0; i < nb_tcs; i++) {
4160                         tc = &dcb_config->tc_config[i];
4161                         if (bw_conf->tc_num != nb_tcs)
4162                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4163                                         (uint8_t)(100 / nb_tcs);
4164                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4165                                                 (uint8_t)(100 / nb_tcs);
4166                 }
4167                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4168                         tc = &dcb_config->tc_config[i];
4169                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4170                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4171                 }
4172         } else {
4173                 /* Re-configure 8 TCs BW */
4174                 for (i = 0; i < nb_tcs; i++) {
4175                         tc = &dcb_config->tc_config[i];
4176                         if (bw_conf->tc_num != nb_tcs)
4177                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4178                                         (uint8_t)(100 / nb_tcs + (i & 1));
4179                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4180                                 (uint8_t)(100 / nb_tcs + (i & 1));
4181                 }
4182         }
4183
4184         switch (hw->mac.type) {
4185         case ixgbe_mac_X550:
4186         case ixgbe_mac_X550EM_x:
4187         case ixgbe_mac_X550EM_a:
4188                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4189                 break;
4190         default:
4191                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4192                 break;
4193         }
4194
4195         if (config_dcb_rx) {
4196                 /* Set RX buffer size */
4197                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4198                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4199
4200                 for (i = 0; i < nb_tcs; i++) {
4201                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4202                 }
4203                 /* zero alloc all unused TCs */
4204                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4205                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4206                 }
4207         }
4208         if (config_dcb_tx) {
4209                 /* Only support an equally distributed
4210                  *  Tx packet buffer strategy.
4211                  */
4212                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4213                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4214
4215                 for (i = 0; i < nb_tcs; i++) {
4216                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4217                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4218                 }
4219                 /* Clear unused TCs, if any, to zero buffer size*/
4220                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4221                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4222                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4223                 }
4224         }
4225
4226         /*Calculates traffic class credits*/
4227         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4228                                 IXGBE_DCB_TX_CONFIG);
4229         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4230                                 IXGBE_DCB_RX_CONFIG);
4231
4232         if (config_dcb_rx) {
4233                 /* Unpack CEE standard containers */
4234                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4235                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4236                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4237                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4238                 /* Configure PG(ETS) RX */
4239                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4240         }
4241
4242         if (config_dcb_tx) {
4243                 /* Unpack CEE standard containers */
4244                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4245                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4246                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4247                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4248                 /* Configure PG(ETS) TX */
4249                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4250         }
4251
4252         /*Configure queue statistics registers*/
4253         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4254
4255         /* Check if the PFC is supported */
4256         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4257                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4258                 for (i = 0; i < nb_tcs; i++) {
4259                         /*
4260                         * If the TC count is 8,and the default high_water is 48,
4261                         * the low_water is 16 as default.
4262                         */
4263                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4264                         hw->fc.low_water[i] = pbsize / 4;
4265                         /* Enable pfc for this TC */
4266                         tc = &dcb_config->tc_config[i];
4267                         tc->pfc = ixgbe_dcb_pfc_enabled;
4268                 }
4269                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4270                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4271                         pfc_en &= 0x0F;
4272                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4273         }
4274
4275         return ret;
4276 }
4277
4278 /**
4279  * ixgbe_configure_dcb - Configure DCB  Hardware
4280  * @dev: pointer to rte_eth_dev
4281  */
4282 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4283 {
4284         struct ixgbe_dcb_config *dcb_cfg =
4285                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4286         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4287
4288         PMD_INIT_FUNC_TRACE();
4289
4290         /* check support mq_mode for DCB */
4291         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4292             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4293             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4294                 return;
4295
4296         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4297                 return;
4298
4299         /** Configure DCB hardware **/
4300         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4301 }
4302
4303 /*
4304  * VMDq only support for 10 GbE NIC.
4305  */
4306 static void
4307 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4308 {
4309         struct rte_eth_vmdq_rx_conf *cfg;
4310         struct ixgbe_hw *hw;
4311         enum rte_eth_nb_pools num_pools;
4312         uint32_t mrqc, vt_ctl, vlanctrl;
4313         uint32_t vmolr = 0;
4314         int i;
4315
4316         PMD_INIT_FUNC_TRACE();
4317         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4318         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4319         num_pools = cfg->nb_queue_pools;
4320
4321         ixgbe_rss_disable(dev);
4322
4323         /* MRQC: enable vmdq */
4324         mrqc = IXGBE_MRQC_VMDQEN;
4325         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4326
4327         /* PFVTCTL: turn on virtualisation and set the default pool */
4328         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4329         if (cfg->enable_default_pool)
4330                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4331         else
4332                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4333
4334         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4335
4336         for (i = 0; i < (int)num_pools; i++) {
4337                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4338                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4339         }
4340
4341         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4342         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4343         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4344         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4345
4346         /* VFTA - enable all vlan filters */
4347         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4348                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4349
4350         /* VFRE: pool enabling for receive - 64 */
4351         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4352         if (num_pools == ETH_64_POOLS)
4353                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4354
4355         /*
4356          * MPSAR - allow pools to read specific mac addresses
4357          * In this case, all pools should be able to read from mac addr 0
4358          */
4359         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4360         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4361
4362         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4363         for (i = 0; i < cfg->nb_pool_maps; i++) {
4364                 /* set vlan id in VF register and set the valid bit */
4365                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4366                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4367                 /*
4368                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4369                  * pools, we only need to use the first half of the register
4370                  * i.e. bits 0-31
4371                  */
4372                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4373                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4374                                         (cfg->pool_map[i].pools & UINT32_MAX));
4375                 else
4376                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4377                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4378
4379         }
4380
4381         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4382         if (cfg->enable_loop_back) {
4383                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4384                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4385                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4386         }
4387
4388         IXGBE_WRITE_FLUSH(hw);
4389 }
4390
4391 /*
4392  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4393  * @hw: pointer to hardware structure
4394  */
4395 static void
4396 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4397 {
4398         uint32_t reg;
4399         uint32_t q;
4400
4401         PMD_INIT_FUNC_TRACE();
4402         /*PF VF Transmit Enable*/
4403         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4404         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4405
4406         /* Disable the Tx desc arbiter so that MTQC can be changed */
4407         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4408         reg |= IXGBE_RTTDCS_ARBDIS;
4409         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4410
4411         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4412         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4413
4414         /* Disable drop for all queues */
4415         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4416                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4417                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4418
4419         /* Enable the Tx desc arbiter */
4420         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4421         reg &= ~IXGBE_RTTDCS_ARBDIS;
4422         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4423
4424         IXGBE_WRITE_FLUSH(hw);
4425 }
4426
4427 static int __attribute__((cold))
4428 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4429 {
4430         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4431         uint64_t dma_addr;
4432         unsigned int i;
4433
4434         /* Initialize software ring entries */
4435         for (i = 0; i < rxq->nb_rx_desc; i++) {
4436                 volatile union ixgbe_adv_rx_desc *rxd;
4437                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4438
4439                 if (mbuf == NULL) {
4440                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4441                                      (unsigned) rxq->queue_id);
4442                         return -ENOMEM;
4443                 }
4444
4445                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4446                 mbuf->port = rxq->port_id;
4447
4448                 dma_addr =
4449                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4450                 rxd = &rxq->rx_ring[i];
4451                 rxd->read.hdr_addr = 0;
4452                 rxd->read.pkt_addr = dma_addr;
4453                 rxe[i].mbuf = mbuf;
4454         }
4455
4456         return 0;
4457 }
4458
4459 static int
4460 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4461 {
4462         struct ixgbe_hw *hw;
4463         uint32_t mrqc;
4464
4465         ixgbe_rss_configure(dev);
4466
4467         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4468
4469         /* MRQC: enable VF RSS */
4470         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4471         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4472         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4473         case ETH_64_POOLS:
4474                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4475                 break;
4476
4477         case ETH_32_POOLS:
4478                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4479                 break;
4480
4481         default:
4482                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4483                 return -EINVAL;
4484         }
4485
4486         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4487
4488         return 0;
4489 }
4490
4491 static int
4492 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4493 {
4494         struct ixgbe_hw *hw =
4495                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4496
4497         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4498         case ETH_64_POOLS:
4499                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4500                         IXGBE_MRQC_VMDQEN);
4501                 break;
4502
4503         case ETH_32_POOLS:
4504                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4505                         IXGBE_MRQC_VMDQRT4TCEN);
4506                 break;
4507
4508         case ETH_16_POOLS:
4509                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4510                         IXGBE_MRQC_VMDQRT8TCEN);
4511                 break;
4512         default:
4513                 PMD_INIT_LOG(ERR,
4514                         "invalid pool number in IOV mode");
4515                 break;
4516         }
4517         return 0;
4518 }
4519
4520 static int
4521 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4522 {
4523         struct ixgbe_hw *hw =
4524                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4525
4526         if (hw->mac.type == ixgbe_mac_82598EB)
4527                 return 0;
4528
4529         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4530                 /*
4531                  * SRIOV inactive scheme
4532                  * any DCB/RSS w/o VMDq multi-queue setting
4533                  */
4534                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4535                 case ETH_MQ_RX_RSS:
4536                 case ETH_MQ_RX_DCB_RSS:
4537                 case ETH_MQ_RX_VMDQ_RSS:
4538                         ixgbe_rss_configure(dev);
4539                         break;
4540
4541                 case ETH_MQ_RX_VMDQ_DCB:
4542                         ixgbe_vmdq_dcb_configure(dev);
4543                         break;
4544
4545                 case ETH_MQ_RX_VMDQ_ONLY:
4546                         ixgbe_vmdq_rx_hw_configure(dev);
4547                         break;
4548
4549                 case ETH_MQ_RX_NONE:
4550                 default:
4551                         /* if mq_mode is none, disable rss mode.*/
4552                         ixgbe_rss_disable(dev);
4553                         break;
4554                 }
4555         } else {
4556                 /* SRIOV active scheme
4557                  * Support RSS together with SRIOV.
4558                  */
4559                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4560                 case ETH_MQ_RX_RSS:
4561                 case ETH_MQ_RX_VMDQ_RSS:
4562                         ixgbe_config_vf_rss(dev);
4563                         break;
4564                 case ETH_MQ_RX_VMDQ_DCB:
4565                 case ETH_MQ_RX_DCB:
4566                 /* In SRIOV, the configuration is the same as VMDq case */
4567                         ixgbe_vmdq_dcb_configure(dev);
4568                         break;
4569                 /* DCB/RSS together with SRIOV is not supported */
4570                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4571                 case ETH_MQ_RX_DCB_RSS:
4572                         PMD_INIT_LOG(ERR,
4573                                 "Could not support DCB/RSS with VMDq & SRIOV");
4574                         return -1;
4575                 default:
4576                         ixgbe_config_vf_default(dev);
4577                         break;
4578                 }
4579         }
4580
4581         return 0;
4582 }
4583
4584 static int
4585 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4586 {
4587         struct ixgbe_hw *hw =
4588                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4589         uint32_t mtqc;
4590         uint32_t rttdcs;
4591
4592         if (hw->mac.type == ixgbe_mac_82598EB)
4593                 return 0;
4594
4595         /* disable arbiter before setting MTQC */
4596         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4597         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4598         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4599
4600         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4601                 /*
4602                  * SRIOV inactive scheme
4603                  * any DCB w/o VMDq multi-queue setting
4604                  */
4605                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4606                         ixgbe_vmdq_tx_hw_configure(hw);
4607                 else {
4608                         mtqc = IXGBE_MTQC_64Q_1PB;
4609                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4610                 }
4611         } else {
4612                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4613
4614                 /*
4615                  * SRIOV active scheme
4616                  * FIXME if support DCB together with VMDq & SRIOV
4617                  */
4618                 case ETH_64_POOLS:
4619                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4620                         break;
4621                 case ETH_32_POOLS:
4622                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4623                         break;
4624                 case ETH_16_POOLS:
4625                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4626                                 IXGBE_MTQC_8TC_8TQ;
4627                         break;
4628                 default:
4629                         mtqc = IXGBE_MTQC_64Q_1PB;
4630                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4631                 }
4632                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4633         }
4634
4635         /* re-enable arbiter */
4636         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4637         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4638
4639         return 0;
4640 }
4641
4642 /**
4643  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4644  *
4645  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4646  * spec rev. 3.0 chapter 8.2.3.8.13.
4647  *
4648  * @pool Memory pool of the Rx queue
4649  */
4650 static inline uint32_t
4651 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4652 {
4653         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4654
4655         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4656         uint16_t maxdesc =
4657                 RTE_IPV4_MAX_PKT_LEN /
4658                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4659
4660         if (maxdesc >= 16)
4661                 return IXGBE_RSCCTL_MAXDESC_16;
4662         else if (maxdesc >= 8)
4663                 return IXGBE_RSCCTL_MAXDESC_8;
4664         else if (maxdesc >= 4)
4665                 return IXGBE_RSCCTL_MAXDESC_4;
4666         else
4667                 return IXGBE_RSCCTL_MAXDESC_1;
4668 }
4669
4670 /**
4671  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4672  * interrupt
4673  *
4674  * (Taken from FreeBSD tree)
4675  * (yes this is all very magic and confusing :)
4676  *
4677  * @dev port handle
4678  * @entry the register array entry
4679  * @vector the MSIX vector for this queue
4680  * @type RX/TX/MISC
4681  */
4682 static void
4683 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4684 {
4685         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4686         u32 ivar, index;
4687
4688         vector |= IXGBE_IVAR_ALLOC_VAL;
4689
4690         switch (hw->mac.type) {
4691
4692         case ixgbe_mac_82598EB:
4693                 if (type == -1)
4694                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4695                 else
4696                         entry += (type * 64);
4697                 index = (entry >> 2) & 0x1F;
4698                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4699                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4700                 ivar |= (vector << (8 * (entry & 0x3)));
4701                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4702                 break;
4703
4704         case ixgbe_mac_82599EB:
4705         case ixgbe_mac_X540:
4706                 if (type == -1) { /* MISC IVAR */
4707                         index = (entry & 1) * 8;
4708                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4709                         ivar &= ~(0xFF << index);
4710                         ivar |= (vector << index);
4711                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4712                 } else {        /* RX/TX IVARS */
4713                         index = (16 * (entry & 1)) + (8 * type);
4714                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4715                         ivar &= ~(0xFF << index);
4716                         ivar |= (vector << index);
4717                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4718                 }
4719
4720                 break;
4721
4722         default:
4723                 break;
4724         }
4725 }
4726
4727 void __attribute__((cold))
4728 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4729 {
4730         uint16_t i, rx_using_sse;
4731         struct ixgbe_adapter *adapter = dev->data->dev_private;
4732
4733         /*
4734          * In order to allow Vector Rx there are a few configuration
4735          * conditions to be met and Rx Bulk Allocation should be allowed.
4736          */
4737         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4738             !adapter->rx_bulk_alloc_allowed) {
4739                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4740                                     "preconditions",
4741                              dev->data->port_id);
4742
4743                 adapter->rx_vec_allowed = false;
4744         }
4745
4746         /*
4747          * Initialize the appropriate LRO callback.
4748          *
4749          * If all queues satisfy the bulk allocation preconditions
4750          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4751          * Otherwise use a single allocation version.
4752          */
4753         if (dev->data->lro) {
4754                 if (adapter->rx_bulk_alloc_allowed) {
4755                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4756                                            "allocation version");
4757                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4758                 } else {
4759                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4760                                            "allocation version");
4761                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4762                 }
4763         } else if (dev->data->scattered_rx) {
4764                 /*
4765                  * Set the non-LRO scattered callback: there are Vector and
4766                  * single allocation versions.
4767                  */
4768                 if (adapter->rx_vec_allowed) {
4769                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4770                                             "callback (port=%d).",
4771                                      dev->data->port_id);
4772
4773                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4774                 } else if (adapter->rx_bulk_alloc_allowed) {
4775                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4776                                            "allocation callback (port=%d).",
4777                                      dev->data->port_id);
4778                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4779                 } else {
4780                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4781                                             "single allocation) "
4782                                             "Scattered Rx callback "
4783                                             "(port=%d).",
4784                                      dev->data->port_id);
4785
4786                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4787                 }
4788         /*
4789          * Below we set "simple" callbacks according to port/queues parameters.
4790          * If parameters allow we are going to choose between the following
4791          * callbacks:
4792          *    - Vector
4793          *    - Bulk Allocation
4794          *    - Single buffer allocation (the simplest one)
4795          */
4796         } else if (adapter->rx_vec_allowed) {
4797                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4798                                     "burst size no less than %d (port=%d).",
4799                              RTE_IXGBE_DESCS_PER_LOOP,
4800                              dev->data->port_id);
4801
4802                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4803         } else if (adapter->rx_bulk_alloc_allowed) {
4804                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4805                                     "satisfied. Rx Burst Bulk Alloc function "
4806                                     "will be used on port=%d.",
4807                              dev->data->port_id);
4808
4809                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4810         } else {
4811                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4812                                     "satisfied, or Scattered Rx is requested "
4813                                     "(port=%d).",
4814                              dev->data->port_id);
4815
4816                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4817         }
4818
4819         /* Propagate information about RX function choice through all queues. */
4820
4821         rx_using_sse =
4822                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4823                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4824
4825         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4826                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4827
4828                 rxq->rx_using_sse = rx_using_sse;
4829 #ifdef RTE_LIBRTE_SECURITY
4830                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4831                                 DEV_RX_OFFLOAD_SECURITY);
4832 #endif
4833         }
4834 }
4835
4836 /**
4837  * ixgbe_set_rsc - configure RSC related port HW registers
4838  *
4839  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4840  * of 82599 Spec (x540 configuration is virtually the same).
4841  *
4842  * @dev port handle
4843  *
4844  * Returns 0 in case of success or a non-zero error code
4845  */
4846 static int
4847 ixgbe_set_rsc(struct rte_eth_dev *dev)
4848 {
4849         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4850         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4851         struct rte_eth_dev_info dev_info = { 0 };
4852         bool rsc_capable = false;
4853         uint16_t i;
4854         uint32_t rdrxctl;
4855         uint32_t rfctl;
4856
4857         /* Sanity check */
4858         dev->dev_ops->dev_infos_get(dev, &dev_info);
4859         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4860                 rsc_capable = true;
4861
4862         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4863                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4864                                    "support it");
4865                 return -EINVAL;
4866         }
4867
4868         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4869
4870         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4871              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4872                 /*
4873                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4874                  * 3.0 RSC configuration requires HW CRC stripping being
4875                  * enabled. If user requested both HW CRC stripping off
4876                  * and RSC on - return an error.
4877                  */
4878                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4879                                     "is disabled");
4880                 return -EINVAL;
4881         }
4882
4883         /* RFCTL configuration  */
4884         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4885         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4886                 /*
4887                  * Since NFS packets coalescing is not supported - clear
4888                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4889                  * enabled.
4890                  */
4891                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4892                            IXGBE_RFCTL_NFSR_DIS);
4893         else
4894                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4895         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4896
4897         /* If LRO hasn't been requested - we are done here. */
4898         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4899                 return 0;
4900
4901         /* Set RDRXCTL.RSCACKC bit */
4902         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4903         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4904         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4905
4906         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4907         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4908                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4909                 uint32_t srrctl =
4910                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4911                 uint32_t rscctl =
4912                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4913                 uint32_t psrtype =
4914                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4915                 uint32_t eitr =
4916                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4917
4918                 /*
4919                  * ixgbe PMD doesn't support header-split at the moment.
4920                  *
4921                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4922                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4923                  * should be configured even if header split is not
4924                  * enabled. We will configure it 128 bytes following the
4925                  * recommendation in the spec.
4926                  */
4927                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4928                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4929                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4930
4931                 /*
4932                  * TODO: Consider setting the Receive Descriptor Minimum
4933                  * Threshold Size for an RSC case. This is not an obviously
4934                  * beneficiary option but the one worth considering...
4935                  */
4936
4937                 rscctl |= IXGBE_RSCCTL_RSCEN;
4938                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4939                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4940
4941                 /*
4942                  * RSC: Set ITR interval corresponding to 2K ints/s.
4943                  *
4944                  * Full-sized RSC aggregations for a 10Gb/s link will
4945                  * arrive at about 20K aggregation/s rate.
4946                  *
4947                  * 2K inst/s rate will make only 10% of the
4948                  * aggregations to be closed due to the interrupt timer
4949                  * expiration for a streaming at wire-speed case.
4950                  *
4951                  * For a sparse streaming case this setting will yield
4952                  * at most 500us latency for a single RSC aggregation.
4953                  */
4954                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4955                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4956                 eitr |= IXGBE_EITR_CNT_WDIS;
4957
4958                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4959                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4960                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4961                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4962
4963                 /*
4964                  * RSC requires the mapping of the queue to the
4965                  * interrupt vector.
4966                  */
4967                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4968         }
4969
4970         dev->data->lro = 1;
4971
4972         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4973
4974         return 0;
4975 }
4976
4977 /*
4978  * Initializes Receive Unit.
4979  */
4980 int __attribute__((cold))
4981 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4982 {
4983         struct ixgbe_hw     *hw;
4984         struct ixgbe_rx_queue *rxq;
4985         uint64_t bus_addr;
4986         uint32_t rxctrl;
4987         uint32_t fctrl;
4988         uint32_t hlreg0;
4989         uint32_t maxfrs;
4990         uint32_t srrctl;
4991         uint32_t rdrxctl;
4992         uint32_t rxcsum;
4993         uint16_t buf_size;
4994         uint16_t i;
4995         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4996         int rc;
4997
4998         PMD_INIT_FUNC_TRACE();
4999         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5000
5001         /*
5002          * Make sure receives are disabled while setting
5003          * up the RX context (registers, descriptor rings, etc.).
5004          */
5005         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5006         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5007
5008         /* Enable receipt of broadcasted frames */
5009         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5010         fctrl |= IXGBE_FCTRL_BAM;
5011         fctrl |= IXGBE_FCTRL_DPF;
5012         fctrl |= IXGBE_FCTRL_PMCF;
5013         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5014
5015         /*
5016          * Configure CRC stripping, if any.
5017          */
5018         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5019         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5020                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5021         else
5022                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5023
5024         /*
5025          * Configure jumbo frame support, if any.
5026          */
5027         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5028                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5029                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5030                 maxfrs &= 0x0000FFFF;
5031                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5032                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5033         } else
5034                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5035
5036         /*
5037          * If loopback mode is configured, set LPBK bit.
5038          */
5039         if (dev->data->dev_conf.lpbk_mode != 0) {
5040                 rc = ixgbe_check_supported_loopback_mode(dev);
5041                 if (rc < 0) {
5042                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5043                         return rc;
5044                 }
5045                 hlreg0 |= IXGBE_HLREG0_LPBK;
5046         } else {
5047                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5048         }
5049
5050         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5051
5052         /*
5053          * Assume no header split and no VLAN strip support
5054          * on any Rx queue first .
5055          */
5056         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5057         /* Setup RX queues */
5058         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5059                 rxq = dev->data->rx_queues[i];
5060
5061                 /*
5062                  * Reset crc_len in case it was changed after queue setup by a
5063                  * call to configure.
5064                  */
5065                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5066                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5067                 else
5068                         rxq->crc_len = 0;
5069
5070                 /* Setup the Base and Length of the Rx Descriptor Rings */
5071                 bus_addr = rxq->rx_ring_phys_addr;
5072                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5073                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5074                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5075                                 (uint32_t)(bus_addr >> 32));
5076                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5077                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5078                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5079                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5080
5081                 /* Configure the SRRCTL register */
5082                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5083
5084                 /* Set if packets are dropped when no descriptors available */
5085                 if (rxq->drop_en)
5086                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5087
5088                 /*
5089                  * Configure the RX buffer size in the BSIZEPACKET field of
5090                  * the SRRCTL register of the queue.
5091                  * The value is in 1 KB resolution. Valid values can be from
5092                  * 1 KB to 16 KB.
5093                  */
5094                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5095                         RTE_PKTMBUF_HEADROOM);
5096                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5097                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5098
5099                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5100
5101                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5102                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5103
5104                 /* It adds dual VLAN length for supporting dual VLAN */
5105                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5106                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5107                         dev->data->scattered_rx = 1;
5108                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5109                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5110         }
5111
5112         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5113                 dev->data->scattered_rx = 1;
5114
5115         /*
5116          * Device configured with multiple RX queues.
5117          */
5118         ixgbe_dev_mq_rx_configure(dev);
5119
5120         /*
5121          * Setup the Checksum Register.
5122          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5123          * Enable IP/L4 checkum computation by hardware if requested to do so.
5124          */
5125         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5126         rxcsum |= IXGBE_RXCSUM_PCSD;
5127         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5128                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5129         else
5130                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5131
5132         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5133
5134         if (hw->mac.type == ixgbe_mac_82599EB ||
5135             hw->mac.type == ixgbe_mac_X540) {
5136                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5137                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5138                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5139                 else
5140                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5141                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5142                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5143         }
5144
5145         rc = ixgbe_set_rsc(dev);
5146         if (rc)
5147                 return rc;
5148
5149         ixgbe_set_rx_function(dev);
5150
5151         return 0;
5152 }
5153
5154 /*
5155  * Initializes Transmit Unit.
5156  */
5157 void __attribute__((cold))
5158 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5159 {
5160         struct ixgbe_hw     *hw;
5161         struct ixgbe_tx_queue *txq;
5162         uint64_t bus_addr;
5163         uint32_t hlreg0;
5164         uint32_t txctrl;
5165         uint16_t i;
5166
5167         PMD_INIT_FUNC_TRACE();
5168         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5169
5170         /* Enable TX CRC (checksum offload requirement) and hw padding
5171          * (TSO requirement)
5172          */
5173         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5174         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5175         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5176
5177         /* Setup the Base and Length of the Tx Descriptor Rings */
5178         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5179                 txq = dev->data->tx_queues[i];
5180
5181                 bus_addr = txq->tx_ring_phys_addr;
5182                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5183                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5184                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5185                                 (uint32_t)(bus_addr >> 32));
5186                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5187                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5188                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5189                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5190                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5191
5192                 /*
5193                  * Disable Tx Head Writeback RO bit, since this hoses
5194                  * bookkeeping if things aren't delivered in order.
5195                  */
5196                 switch (hw->mac.type) {
5197                 case ixgbe_mac_82598EB:
5198                         txctrl = IXGBE_READ_REG(hw,
5199                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5200                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5201                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5202                                         txctrl);
5203                         break;
5204
5205                 case ixgbe_mac_82599EB:
5206                 case ixgbe_mac_X540:
5207                 case ixgbe_mac_X550:
5208                 case ixgbe_mac_X550EM_x:
5209                 case ixgbe_mac_X550EM_a:
5210                 default:
5211                         txctrl = IXGBE_READ_REG(hw,
5212                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5213                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5214                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5215                                         txctrl);
5216                         break;
5217                 }
5218         }
5219
5220         /* Device configured with multiple TX queues. */
5221         ixgbe_dev_mq_tx_configure(dev);
5222 }
5223
5224 /*
5225  * Check if requested loopback mode is supported
5226  */
5227 int
5228 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5229 {
5230         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5231
5232         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5233                 if (hw->mac.type == ixgbe_mac_82599EB ||
5234                      hw->mac.type == ixgbe_mac_X540 ||
5235                      hw->mac.type == ixgbe_mac_X550 ||
5236                      hw->mac.type == ixgbe_mac_X550EM_x ||
5237                      hw->mac.type == ixgbe_mac_X550EM_a)
5238                         return 0;
5239
5240         return -ENOTSUP;
5241 }
5242
5243 /*
5244  * Set up link for 82599 loopback mode Tx->Rx.
5245  */
5246 static inline void __attribute__((cold))
5247 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5248 {
5249         PMD_INIT_FUNC_TRACE();
5250
5251         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5252                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5253                                 IXGBE_SUCCESS) {
5254                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5255                         /* ignore error */
5256                         return;
5257                 }
5258         }
5259
5260         /* Restart link */
5261         IXGBE_WRITE_REG(hw,
5262                         IXGBE_AUTOC,
5263                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5264         ixgbe_reset_pipeline_82599(hw);
5265
5266         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5267         msec_delay(50);
5268 }
5269
5270
5271 /*
5272  * Start Transmit and Receive Units.
5273  */
5274 int __attribute__((cold))
5275 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5276 {
5277         struct ixgbe_hw     *hw;
5278         struct ixgbe_tx_queue *txq;
5279         struct ixgbe_rx_queue *rxq;
5280         uint32_t txdctl;
5281         uint32_t dmatxctl;
5282         uint32_t rxctrl;
5283         uint16_t i;
5284         int ret = 0;
5285
5286         PMD_INIT_FUNC_TRACE();
5287         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5288
5289         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5290                 txq = dev->data->tx_queues[i];
5291                 /* Setup Transmit Threshold Registers */
5292                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5293                 txdctl |= txq->pthresh & 0x7F;
5294                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5295                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5296                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5297         }
5298
5299         if (hw->mac.type != ixgbe_mac_82598EB) {
5300                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5301                 dmatxctl |= IXGBE_DMATXCTL_TE;
5302                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5303         }
5304
5305         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5306                 txq = dev->data->tx_queues[i];
5307                 if (!txq->tx_deferred_start) {
5308                         ret = ixgbe_dev_tx_queue_start(dev, i);
5309                         if (ret < 0)
5310                                 return ret;
5311                 }
5312         }
5313
5314         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5315                 rxq = dev->data->rx_queues[i];
5316                 if (!rxq->rx_deferred_start) {
5317                         ret = ixgbe_dev_rx_queue_start(dev, i);
5318                         if (ret < 0)
5319                                 return ret;
5320                 }
5321         }
5322
5323         /* Enable Receive engine */
5324         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5325         if (hw->mac.type == ixgbe_mac_82598EB)
5326                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5327         rxctrl |= IXGBE_RXCTRL_RXEN;
5328         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5329
5330         /* If loopback mode is enabled, set up the link accordingly */
5331         if (dev->data->dev_conf.lpbk_mode != 0) {
5332                 if (hw->mac.type == ixgbe_mac_82599EB)
5333                         ixgbe_setup_loopback_link_82599(hw);
5334                 else if (hw->mac.type == ixgbe_mac_X540 ||
5335                      hw->mac.type == ixgbe_mac_X550 ||
5336                      hw->mac.type == ixgbe_mac_X550EM_x ||
5337                      hw->mac.type == ixgbe_mac_X550EM_a)
5338                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5339         }
5340
5341 #ifdef RTE_LIBRTE_SECURITY
5342         if ((dev->data->dev_conf.rxmode.offloads &
5343                         DEV_RX_OFFLOAD_SECURITY) ||
5344                 (dev->data->dev_conf.txmode.offloads &
5345                         DEV_TX_OFFLOAD_SECURITY)) {
5346                 ret = ixgbe_crypto_enable_ipsec(dev);
5347                 if (ret != 0) {
5348                         PMD_DRV_LOG(ERR,
5349                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5350                                     ret);
5351                         return ret;
5352                 }
5353         }
5354 #endif
5355
5356         return 0;
5357 }
5358
5359 /*
5360  * Start Receive Units for specified queue.
5361  */
5362 int __attribute__((cold))
5363 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5364 {
5365         struct ixgbe_hw     *hw;
5366         struct ixgbe_rx_queue *rxq;
5367         uint32_t rxdctl;
5368         int poll_ms;
5369
5370         PMD_INIT_FUNC_TRACE();
5371         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5372
5373         rxq = dev->data->rx_queues[rx_queue_id];
5374
5375         /* Allocate buffers for descriptor rings */
5376         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5377                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5378                              rx_queue_id);
5379                 return -1;
5380         }
5381         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5382         rxdctl |= IXGBE_RXDCTL_ENABLE;
5383         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5384
5385         /* Wait until RX Enable ready */
5386         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5387         do {
5388                 rte_delay_ms(1);
5389                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5390         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5391         if (!poll_ms)
5392                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5393         rte_wmb();
5394         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5395         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5396         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5397
5398         return 0;
5399 }
5400
5401 /*
5402  * Stop Receive Units for specified queue.
5403  */
5404 int __attribute__((cold))
5405 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5406 {
5407         struct ixgbe_hw     *hw;
5408         struct ixgbe_adapter *adapter = dev->data->dev_private;
5409         struct ixgbe_rx_queue *rxq;
5410         uint32_t rxdctl;
5411         int poll_ms;
5412
5413         PMD_INIT_FUNC_TRACE();
5414         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5415
5416         rxq = dev->data->rx_queues[rx_queue_id];
5417
5418         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5419         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5420         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5421
5422         /* Wait until RX Enable bit clear */
5423         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5424         do {
5425                 rte_delay_ms(1);
5426                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5427         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5428         if (!poll_ms)
5429                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5430
5431         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5432
5433         ixgbe_rx_queue_release_mbufs(rxq);
5434         ixgbe_reset_rx_queue(adapter, rxq);
5435         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5436
5437         return 0;
5438 }
5439
5440
5441 /*
5442  * Start Transmit Units for specified queue.
5443  */
5444 int __attribute__((cold))
5445 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5446 {
5447         struct ixgbe_hw     *hw;
5448         struct ixgbe_tx_queue *txq;
5449         uint32_t txdctl;
5450         int poll_ms;
5451
5452         PMD_INIT_FUNC_TRACE();
5453         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5454
5455         txq = dev->data->tx_queues[tx_queue_id];
5456         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5457         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5458         txdctl |= IXGBE_TXDCTL_ENABLE;
5459         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5460
5461         /* Wait until TX Enable ready */
5462         if (hw->mac.type == ixgbe_mac_82599EB) {
5463                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5464                 do {
5465                         rte_delay_ms(1);
5466                         txdctl = IXGBE_READ_REG(hw,
5467                                 IXGBE_TXDCTL(txq->reg_idx));
5468                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5469                 if (!poll_ms)
5470                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5471                                 tx_queue_id);
5472         }
5473         rte_wmb();
5474         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5475         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5476
5477         return 0;
5478 }
5479
5480 /*
5481  * Stop Transmit Units for specified queue.
5482  */
5483 int __attribute__((cold))
5484 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5485 {
5486         struct ixgbe_hw     *hw;
5487         struct ixgbe_tx_queue *txq;
5488         uint32_t txdctl;
5489         uint32_t txtdh, txtdt;
5490         int poll_ms;
5491
5492         PMD_INIT_FUNC_TRACE();
5493         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5494
5495         txq = dev->data->tx_queues[tx_queue_id];
5496
5497         /* Wait until TX queue is empty */
5498         if (hw->mac.type == ixgbe_mac_82599EB) {
5499                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5500                 do {
5501                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5502                         txtdh = IXGBE_READ_REG(hw,
5503                                                IXGBE_TDH(txq->reg_idx));
5504                         txtdt = IXGBE_READ_REG(hw,
5505                                                IXGBE_TDT(txq->reg_idx));
5506                 } while (--poll_ms && (txtdh != txtdt));
5507                 if (!poll_ms)
5508                         PMD_INIT_LOG(ERR,
5509                                 "Tx Queue %d is not empty when stopping.",
5510                                 tx_queue_id);
5511         }
5512
5513         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5514         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5515         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5516
5517         /* Wait until TX Enable bit clear */
5518         if (hw->mac.type == ixgbe_mac_82599EB) {
5519                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5520                 do {
5521                         rte_delay_ms(1);
5522                         txdctl = IXGBE_READ_REG(hw,
5523                                                 IXGBE_TXDCTL(txq->reg_idx));
5524                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5525                 if (!poll_ms)
5526                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5527                                 tx_queue_id);
5528         }
5529
5530         if (txq->ops != NULL) {
5531                 txq->ops->release_mbufs(txq);
5532                 txq->ops->reset(txq);
5533         }
5534         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5535
5536         return 0;
5537 }
5538
5539 void
5540 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5541         struct rte_eth_rxq_info *qinfo)
5542 {
5543         struct ixgbe_rx_queue *rxq;
5544
5545         rxq = dev->data->rx_queues[queue_id];
5546
5547         qinfo->mp = rxq->mb_pool;
5548         qinfo->scattered_rx = dev->data->scattered_rx;
5549         qinfo->nb_desc = rxq->nb_rx_desc;
5550
5551         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5552         qinfo->conf.rx_drop_en = rxq->drop_en;
5553         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5554         qinfo->conf.offloads = rxq->offloads;
5555 }
5556
5557 void
5558 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5559         struct rte_eth_txq_info *qinfo)
5560 {
5561         struct ixgbe_tx_queue *txq;
5562
5563         txq = dev->data->tx_queues[queue_id];
5564
5565         qinfo->nb_desc = txq->nb_tx_desc;
5566
5567         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5568         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5569         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5570
5571         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5572         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5573         qinfo->conf.offloads = txq->offloads;
5574         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5575 }
5576
5577 /*
5578  * [VF] Initializes Receive Unit.
5579  */
5580 int __attribute__((cold))
5581 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5582 {
5583         struct ixgbe_hw     *hw;
5584         struct ixgbe_rx_queue *rxq;
5585         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5586         uint64_t bus_addr;
5587         uint32_t srrctl, psrtype = 0;
5588         uint16_t buf_size;
5589         uint16_t i;
5590         int ret;
5591
5592         PMD_INIT_FUNC_TRACE();
5593         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5594
5595         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5596                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5597                         "it should be power of 2");
5598                 return -1;
5599         }
5600
5601         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5602                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5603                         "it should be equal to or less than %d",
5604                         hw->mac.max_rx_queues);
5605                 return -1;
5606         }
5607
5608         /*
5609          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5610          * disables the VF receipt of packets if the PF MTU is > 1500.
5611          * This is done to deal with 82599 limitations that imposes
5612          * the PF and all VFs to share the same MTU.
5613          * Then, the PF driver enables again the VF receipt of packet when
5614          * the VF driver issues a IXGBE_VF_SET_LPE request.
5615          * In the meantime, the VF device cannot be used, even if the VF driver
5616          * and the Guest VM network stack are ready to accept packets with a
5617          * size up to the PF MTU.
5618          * As a work-around to this PF behaviour, force the call to
5619          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5620          * VF packets received can work in all cases.
5621          */
5622         ixgbevf_rlpml_set_vf(hw,
5623                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5624
5625         /*
5626          * Assume no header split and no VLAN strip support
5627          * on any Rx queue first .
5628          */
5629         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5630         /* Setup RX queues */
5631         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5632                 rxq = dev->data->rx_queues[i];
5633
5634                 /* Allocate buffers for descriptor rings */
5635                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5636                 if (ret)
5637                         return ret;
5638
5639                 /* Setup the Base and Length of the Rx Descriptor Rings */
5640                 bus_addr = rxq->rx_ring_phys_addr;
5641
5642                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5643                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5644                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5645                                 (uint32_t)(bus_addr >> 32));
5646                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5647                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5648                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5649                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5650
5651
5652                 /* Configure the SRRCTL register */
5653                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5654
5655                 /* Set if packets are dropped when no descriptors available */
5656                 if (rxq->drop_en)
5657                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5658
5659                 /*
5660                  * Configure the RX buffer size in the BSIZEPACKET field of
5661                  * the SRRCTL register of the queue.
5662                  * The value is in 1 KB resolution. Valid values can be from
5663                  * 1 KB to 16 KB.
5664                  */
5665                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5666                         RTE_PKTMBUF_HEADROOM);
5667                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5668                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5669
5670                 /*
5671                  * VF modification to write virtual function SRRCTL register
5672                  */
5673                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5674
5675                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5676                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5677
5678                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5679                     /* It adds dual VLAN length for supporting dual VLAN */
5680                     (rxmode->max_rx_pkt_len +
5681                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5682                         if (!dev->data->scattered_rx)
5683                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5684                         dev->data->scattered_rx = 1;
5685                 }
5686
5687                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5688                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5689         }
5690
5691         /* Set RQPL for VF RSS according to max Rx queue */
5692         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5693                 IXGBE_PSRTYPE_RQPL_SHIFT;
5694         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5695
5696         ixgbe_set_rx_function(dev);
5697
5698         return 0;
5699 }
5700
5701 /*
5702  * [VF] Initializes Transmit Unit.
5703  */
5704 void __attribute__((cold))
5705 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5706 {
5707         struct ixgbe_hw     *hw;
5708         struct ixgbe_tx_queue *txq;
5709         uint64_t bus_addr;
5710         uint32_t txctrl;
5711         uint16_t i;
5712
5713         PMD_INIT_FUNC_TRACE();
5714         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5715
5716         /* Setup the Base and Length of the Tx Descriptor Rings */
5717         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5718                 txq = dev->data->tx_queues[i];
5719                 bus_addr = txq->tx_ring_phys_addr;
5720                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5721                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5722                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5723                                 (uint32_t)(bus_addr >> 32));
5724                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5725                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5726                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5727                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5728                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5729
5730                 /*
5731                  * Disable Tx Head Writeback RO bit, since this hoses
5732                  * bookkeeping if things aren't delivered in order.
5733                  */
5734                 txctrl = IXGBE_READ_REG(hw,
5735                                 IXGBE_VFDCA_TXCTRL(i));
5736                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5737                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5738                                 txctrl);
5739         }
5740 }
5741
5742 /*
5743  * [VF] Start Transmit and Receive Units.
5744  */
5745 void __attribute__((cold))
5746 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5747 {
5748         struct ixgbe_hw     *hw;
5749         struct ixgbe_tx_queue *txq;
5750         struct ixgbe_rx_queue *rxq;
5751         uint32_t txdctl;
5752         uint32_t rxdctl;
5753         uint16_t i;
5754         int poll_ms;
5755
5756         PMD_INIT_FUNC_TRACE();
5757         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5758
5759         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5760                 txq = dev->data->tx_queues[i];
5761                 /* Setup Transmit Threshold Registers */
5762                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5763                 txdctl |= txq->pthresh & 0x7F;
5764                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5765                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5766                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5767         }
5768
5769         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5770
5771                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5772                 txdctl |= IXGBE_TXDCTL_ENABLE;
5773                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5774
5775                 poll_ms = 10;
5776                 /* Wait until TX Enable ready */
5777                 do {
5778                         rte_delay_ms(1);
5779                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5780                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5781                 if (!poll_ms)
5782                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5783         }
5784         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5785
5786                 rxq = dev->data->rx_queues[i];
5787
5788                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5789                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5790                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5791
5792                 /* Wait until RX Enable ready */
5793                 poll_ms = 10;
5794                 do {
5795                         rte_delay_ms(1);
5796                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5797                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5798                 if (!poll_ms)
5799                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5800                 rte_wmb();
5801                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5802
5803         }
5804 }
5805
5806 int
5807 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5808                     const struct rte_flow_action_rss *in)
5809 {
5810         if (in->key_len > RTE_DIM(out->key) ||
5811             in->queue_num > RTE_DIM(out->queue))
5812                 return -EINVAL;
5813         out->conf = (struct rte_flow_action_rss){
5814                 .func = in->func,
5815                 .level = in->level,
5816                 .types = in->types,
5817                 .key_len = in->key_len,
5818                 .queue_num = in->queue_num,
5819                 .key = memcpy(out->key, in->key, in->key_len),
5820                 .queue = memcpy(out->queue, in->queue,
5821                                 sizeof(*in->queue) * in->queue_num),
5822         };
5823         return 0;
5824 }
5825
5826 int
5827 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5828                       const struct rte_flow_action_rss *with)
5829 {
5830         return (comp->func == with->func &&
5831                 comp->level == with->level &&
5832                 comp->types == with->types &&
5833                 comp->key_len == with->key_len &&
5834                 comp->queue_num == with->queue_num &&
5835                 !memcmp(comp->key, with->key, with->key_len) &&
5836                 !memcmp(comp->queue, with->queue,
5837                         sizeof(*with->queue) * with->queue_num));
5838 }
5839
5840 int
5841 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5842                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5843 {
5844         struct ixgbe_hw *hw;
5845         uint32_t reta;
5846         uint16_t i;
5847         uint16_t j;
5848         uint16_t sp_reta_size;
5849         uint32_t reta_reg;
5850         struct rte_eth_rss_conf rss_conf = {
5851                 .rss_key = conf->conf.key_len ?
5852                         (void *)(uintptr_t)conf->conf.key : NULL,
5853                 .rss_key_len = conf->conf.key_len,
5854                 .rss_hf = conf->conf.types,
5855         };
5856         struct ixgbe_filter_info *filter_info =
5857                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5858
5859         PMD_INIT_FUNC_TRACE();
5860         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5861
5862         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5863
5864         if (!add) {
5865                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5866                                           &conf->conf)) {
5867                         ixgbe_rss_disable(dev);
5868                         memset(&filter_info->rss_info, 0,
5869                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5870                         return 0;
5871                 }
5872                 return -EINVAL;
5873         }
5874
5875         if (filter_info->rss_info.conf.queue_num)
5876                 return -EINVAL;
5877         /* Fill in redirection table
5878          * The byte-swap is needed because NIC registers are in
5879          * little-endian order.
5880          */
5881         reta = 0;
5882         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5883                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5884
5885                 if (j == conf->conf.queue_num)
5886                         j = 0;
5887                 reta = (reta << 8) | conf->conf.queue[j];
5888                 if ((i & 3) == 3)
5889                         IXGBE_WRITE_REG(hw, reta_reg,
5890                                         rte_bswap32(reta));
5891         }
5892
5893         /* Configure the RSS key and the RSS protocols used to compute
5894          * the RSS hash of input packets.
5895          */
5896         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5897                 ixgbe_rss_disable(dev);
5898                 return 0;
5899         }
5900         if (rss_conf.rss_key == NULL)
5901                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5902         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5903
5904         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5905                 return -EINVAL;
5906
5907         return 0;
5908 }
5909
5910 /* Stubs needed for linkage when CONFIG_RTE_ARCH_PPC_64 is set */
5911 #if defined(RTE_ARCH_PPC_64)
5912 int
5913 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5914 {
5915         return -1;
5916 }
5917
5918 uint16_t
5919 ixgbe_recv_pkts_vec(
5920         void __rte_unused *rx_queue,
5921         struct rte_mbuf __rte_unused **rx_pkts,
5922         uint16_t __rte_unused nb_pkts)
5923 {
5924         return 0;
5925 }
5926
5927 uint16_t
5928 ixgbe_recv_scattered_pkts_vec(
5929         void __rte_unused *rx_queue,
5930         struct rte_mbuf __rte_unused **rx_pkts,
5931         uint16_t __rte_unused nb_pkts)
5932 {
5933         return 0;
5934 }
5935
5936 int
5937 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5938 {
5939         return -1;
5940 }
5941
5942 uint16_t
5943 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5944                 struct rte_mbuf __rte_unused **tx_pkts,
5945                 uint16_t __rte_unused nb_pkts)
5946 {
5947         return 0;
5948 }
5949
5950 int
5951 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5952 {
5953         return -1;
5954 }
5955
5956 void
5957 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
5958 {
5959         return;
5960 }
5961 #endif