f839bcaab79e6c2338b3ba103856a97a67f9ccd1
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45
46 #include "ixgbe_logs.h"
47 #include "base/ixgbe_api.h"
48 #include "base/ixgbe_vf.h"
49 #include "ixgbe_ethdev.h"
50 #include "base/ixgbe_dcb.h"
51 #include "base/ixgbe_common.h"
52 #include "ixgbe_rxtx.h"
53
54 #ifdef RTE_LIBRTE_IEEE1588
55 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
56 #else
57 #define IXGBE_TX_IEEE1588_TMST 0
58 #endif
59 /* Bit Mask to indicate what bits required for building TX context */
60 #define IXGBE_TX_OFFLOAD_MASK (                  \
61                 PKT_TX_OUTER_IPV6 |              \
62                 PKT_TX_OUTER_IPV4 |              \
63                 PKT_TX_IPV6 |                    \
64                 PKT_TX_IPV4 |                    \
65                 PKT_TX_VLAN_PKT |                \
66                 PKT_TX_IP_CKSUM |                \
67                 PKT_TX_L4_MASK |                 \
68                 PKT_TX_TCP_SEG |                 \
69                 PKT_TX_MACSEC |                  \
70                 PKT_TX_OUTER_IP_CKSUM |          \
71                 PKT_TX_SEC_OFFLOAD |     \
72                 IXGBE_TX_IEEE1588_TMST)
73
74 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
75                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
76
77 #if 1
78 #define RTE_PMD_USE_PREFETCH
79 #endif
80
81 #ifdef RTE_PMD_USE_PREFETCH
82 /*
83  * Prefetch a cache line into all cache levels.
84  */
85 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
86 #else
87 #define rte_ixgbe_prefetch(p)   do {} while (0)
88 #endif
89
90 /*********************************************************************
91  *
92  *  TX functions
93  *
94  **********************************************************************/
95
96 /*
97  * Check for descriptors with their DD bit set and free mbufs.
98  * Return the total number of buffers freed.
99  */
100 static __rte_always_inline int
101 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
102 {
103         struct ixgbe_tx_entry *txep;
104         uint32_t status;
105         int i, nb_free = 0;
106         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
107
108         /* check DD bit on threshold descriptor */
109         status = txq->tx_ring[txq->tx_next_dd].wb.status;
110         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
111                 return 0;
112
113         /*
114          * first buffer to free from S/W ring is at index
115          * tx_next_dd - (tx_rs_thresh-1)
116          */
117         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
118
119         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
120                 /* free buffers one at a time */
121                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
122                 txep->mbuf = NULL;
123
124                 if (unlikely(m == NULL))
125                         continue;
126
127                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
128                     (nb_free > 0 && m->pool != free[0]->pool)) {
129                         rte_mempool_put_bulk(free[0]->pool,
130                                              (void **)free, nb_free);
131                         nb_free = 0;
132                 }
133
134                 free[nb_free++] = m;
135         }
136
137         if (nb_free > 0)
138                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
139
140         /* buffers were freed, update counters */
141         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
142         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
143         if (txq->tx_next_dd >= txq->nb_tx_desc)
144                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
145
146         return txq->tx_rs_thresh;
147 }
148
149 /* Populate 4 descriptors with data from 4 mbufs */
150 static inline void
151 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
152 {
153         uint64_t buf_dma_addr;
154         uint32_t pkt_len;
155         int i;
156
157         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
158                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
159                 pkt_len = (*pkts)->data_len;
160
161                 /* write data to descriptor */
162                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
163
164                 txdp->read.cmd_type_len =
165                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
166
167                 txdp->read.olinfo_status =
168                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
169
170                 rte_prefetch0(&(*pkts)->pool);
171         }
172 }
173
174 /* Populate 1 descriptor with data from 1 mbuf */
175 static inline void
176 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
177 {
178         uint64_t buf_dma_addr;
179         uint32_t pkt_len;
180
181         buf_dma_addr = rte_mbuf_data_iova(*pkts);
182         pkt_len = (*pkts)->data_len;
183
184         /* write data to descriptor */
185         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
186         txdp->read.cmd_type_len =
187                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
188         txdp->read.olinfo_status =
189                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
190         rte_prefetch0(&(*pkts)->pool);
191 }
192
193 /*
194  * Fill H/W descriptor ring with mbuf data.
195  * Copy mbuf pointers to the S/W ring.
196  */
197 static inline void
198 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
199                       uint16_t nb_pkts)
200 {
201         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
202         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
203         const int N_PER_LOOP = 4;
204         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
205         int mainpart, leftover;
206         int i, j;
207
208         /*
209          * Process most of the packets in chunks of N pkts.  Any
210          * leftover packets will get processed one at a time.
211          */
212         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
213         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
214         for (i = 0; i < mainpart; i += N_PER_LOOP) {
215                 /* Copy N mbuf pointers to the S/W ring */
216                 for (j = 0; j < N_PER_LOOP; ++j) {
217                         (txep + i + j)->mbuf = *(pkts + i + j);
218                 }
219                 tx4(txdp + i, pkts + i);
220         }
221
222         if (unlikely(leftover > 0)) {
223                 for (i = 0; i < leftover; ++i) {
224                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
225                         tx1(txdp + mainpart + i, pkts + mainpart + i);
226                 }
227         }
228 }
229
230 static inline uint16_t
231 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
232              uint16_t nb_pkts)
233 {
234         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
235         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
236         uint16_t n = 0;
237
238         /*
239          * Begin scanning the H/W ring for done descriptors when the
240          * number of available descriptors drops below tx_free_thresh.  For
241          * each done descriptor, free the associated buffer.
242          */
243         if (txq->nb_tx_free < txq->tx_free_thresh)
244                 ixgbe_tx_free_bufs(txq);
245
246         /* Only use descriptors that are available */
247         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
248         if (unlikely(nb_pkts == 0))
249                 return 0;
250
251         /* Use exactly nb_pkts descriptors */
252         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
253
254         /*
255          * At this point, we know there are enough descriptors in the
256          * ring to transmit all the packets.  This assumes that each
257          * mbuf contains a single segment, and that no new offloads
258          * are expected, which would require a new context descriptor.
259          */
260
261         /*
262          * See if we're going to wrap-around. If so, handle the top
263          * of the descriptor ring first, then do the bottom.  If not,
264          * the processing looks just like the "bottom" part anyway...
265          */
266         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
267                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
268                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
269
270                 /*
271                  * We know that the last descriptor in the ring will need to
272                  * have its RS bit set because tx_rs_thresh has to be
273                  * a divisor of the ring size
274                  */
275                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
276                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
277                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
278
279                 txq->tx_tail = 0;
280         }
281
282         /* Fill H/W descriptor ring with mbuf data */
283         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
284         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
285
286         /*
287          * Determine if RS bit should be set
288          * This is what we actually want:
289          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
290          * but instead of subtracting 1 and doing >=, we can just do
291          * greater than without subtracting.
292          */
293         if (txq->tx_tail > txq->tx_next_rs) {
294                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
295                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
296                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
297                                                 txq->tx_rs_thresh);
298                 if (txq->tx_next_rs >= txq->nb_tx_desc)
299                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
300         }
301
302         /*
303          * Check for wrap-around. This would only happen if we used
304          * up to the last descriptor in the ring, no more, no less.
305          */
306         if (txq->tx_tail >= txq->nb_tx_desc)
307                 txq->tx_tail = 0;
308
309         /* update tail pointer */
310         rte_wmb();
311         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
312
313         return nb_pkts;
314 }
315
316 uint16_t
317 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
318                        uint16_t nb_pkts)
319 {
320         uint16_t nb_tx;
321
322         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
323         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
324                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
325
326         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
327         nb_tx = 0;
328         while (nb_pkts) {
329                 uint16_t ret, n;
330
331                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
332                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
333                 nb_tx = (uint16_t)(nb_tx + ret);
334                 nb_pkts = (uint16_t)(nb_pkts - ret);
335                 if (ret < n)
336                         break;
337         }
338
339         return nb_tx;
340 }
341
342 static uint16_t
343 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
344                     uint16_t nb_pkts)
345 {
346         uint16_t nb_tx = 0;
347         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
348
349         while (nb_pkts) {
350                 uint16_t ret, num;
351
352                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
353                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
354                                                  num);
355                 nb_tx += ret;
356                 nb_pkts -= ret;
357                 if (ret < num)
358                         break;
359         }
360
361         return nb_tx;
362 }
363
364 static inline void
365 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
366                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
367                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
368                 __rte_unused uint64_t *mdata)
369 {
370         uint32_t type_tucmd_mlhl;
371         uint32_t mss_l4len_idx = 0;
372         uint32_t ctx_idx;
373         uint32_t vlan_macip_lens;
374         union ixgbe_tx_offload tx_offload_mask;
375         uint32_t seqnum_seed = 0;
376
377         ctx_idx = txq->ctx_curr;
378         tx_offload_mask.data[0] = 0;
379         tx_offload_mask.data[1] = 0;
380         type_tucmd_mlhl = 0;
381
382         /* Specify which HW CTX to upload. */
383         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
384
385         if (ol_flags & PKT_TX_VLAN_PKT) {
386                 tx_offload_mask.vlan_tci |= ~0;
387         }
388
389         /* check if TCP segmentation required for this packet */
390         if (ol_flags & PKT_TX_TCP_SEG) {
391                 /* implies IP cksum in IPv4 */
392                 if (ol_flags & PKT_TX_IP_CKSUM)
393                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
394                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
395                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
396                 else
397                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
398                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
399                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
400
401                 tx_offload_mask.l2_len |= ~0;
402                 tx_offload_mask.l3_len |= ~0;
403                 tx_offload_mask.l4_len |= ~0;
404                 tx_offload_mask.tso_segsz |= ~0;
405                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
406                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
407         } else { /* no TSO, check if hardware checksum is needed */
408                 if (ol_flags & PKT_TX_IP_CKSUM) {
409                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
410                         tx_offload_mask.l2_len |= ~0;
411                         tx_offload_mask.l3_len |= ~0;
412                 }
413
414                 switch (ol_flags & PKT_TX_L4_MASK) {
415                 case PKT_TX_UDP_CKSUM:
416                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
417                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
418                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
419                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
427                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
428                         tx_offload_mask.l2_len |= ~0;
429                         tx_offload_mask.l3_len |= ~0;
430                         break;
431                 case PKT_TX_SCTP_CKSUM:
432                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
433                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
434                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
435                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
436                         tx_offload_mask.l2_len |= ~0;
437                         tx_offload_mask.l3_len |= ~0;
438                         break;
439                 default:
440                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
441                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
442                         break;
443                 }
444         }
445
446         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
447                 tx_offload_mask.outer_l2_len |= ~0;
448                 tx_offload_mask.outer_l3_len |= ~0;
449                 tx_offload_mask.l2_len |= ~0;
450                 seqnum_seed |= tx_offload.outer_l3_len
451                                << IXGBE_ADVTXD_OUTER_IPLEN;
452                 seqnum_seed |= tx_offload.l2_len
453                                << IXGBE_ADVTXD_TUNNEL_LEN;
454         }
455 #ifdef RTE_LIBRTE_SECURITY
456         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
457                 union ixgbe_crypto_tx_desc_md *md =
458                                 (union ixgbe_crypto_tx_desc_md *)mdata;
459                 seqnum_seed |=
460                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
461                 type_tucmd_mlhl |= md->enc ?
462                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
463                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
464                 type_tucmd_mlhl |=
465                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
466                 tx_offload_mask.sa_idx |= ~0;
467                 tx_offload_mask.sec_pad_len |= ~0;
468         }
469 #endif
470
471         txq->ctx_cache[ctx_idx].flags = ol_flags;
472         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
473                 tx_offload_mask.data[0] & tx_offload.data[0];
474         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
475                 tx_offload_mask.data[1] & tx_offload.data[1];
476         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
477
478         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
479         vlan_macip_lens = tx_offload.l3_len;
480         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
481                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
482                                     IXGBE_ADVTXD_MACLEN_SHIFT);
483         else
484                 vlan_macip_lens |= (tx_offload.l2_len <<
485                                     IXGBE_ADVTXD_MACLEN_SHIFT);
486         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
487         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
488         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
489         ctx_txd->seqnum_seed     = seqnum_seed;
490 }
491
492 /*
493  * Check which hardware context can be used. Use the existing match
494  * or create a new context descriptor.
495  */
496 static inline uint32_t
497 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
498                    union ixgbe_tx_offload tx_offload)
499 {
500         /* If match with the current used context */
501         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
502                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
503                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
504                      & tx_offload.data[0])) &&
505                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
506                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
507                      & tx_offload.data[1]))))
508                 return txq->ctx_curr;
509
510         /* What if match with the next context  */
511         txq->ctx_curr ^= 1;
512         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
513                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
514                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
515                      & tx_offload.data[0])) &&
516                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
517                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
518                      & tx_offload.data[1]))))
519                 return txq->ctx_curr;
520
521         /* Mismatch, use the previous context */
522         return IXGBE_CTX_NUM;
523 }
524
525 static inline uint32_t
526 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
527 {
528         uint32_t tmp = 0;
529
530         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
531                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
532         if (ol_flags & PKT_TX_IP_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
534         if (ol_flags & PKT_TX_TCP_SEG)
535                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
536         return tmp;
537 }
538
539 static inline uint32_t
540 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
541 {
542         uint32_t cmdtype = 0;
543
544         if (ol_flags & PKT_TX_VLAN_PKT)
545                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
548         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
549                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
550         if (ol_flags & PKT_TX_MACSEC)
551                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
552         return cmdtype;
553 }
554
555 /* Default RS bit threshold values */
556 #ifndef DEFAULT_TX_RS_THRESH
557 #define DEFAULT_TX_RS_THRESH   32
558 #endif
559 #ifndef DEFAULT_TX_FREE_THRESH
560 #define DEFAULT_TX_FREE_THRESH 32
561 #endif
562
563 /* Reset transmit descriptors after they have been used */
564 static inline int
565 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
566 {
567         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
568         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
569         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
570         uint16_t nb_tx_desc = txq->nb_tx_desc;
571         uint16_t desc_to_clean_to;
572         uint16_t nb_tx_to_clean;
573         uint32_t status;
574
575         /* Determine the last descriptor needing to be cleaned */
576         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
577         if (desc_to_clean_to >= nb_tx_desc)
578                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
579
580         /* Check to make sure the last descriptor to clean is done */
581         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
582         status = txr[desc_to_clean_to].wb.status;
583         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
584                 PMD_TX_FREE_LOG(DEBUG,
585                                 "TX descriptor %4u is not done"
586                                 "(port=%d queue=%d)",
587                                 desc_to_clean_to,
588                                 txq->port_id, txq->queue_id);
589                 /* Failed to clean any descriptors, better luck next time */
590                 return -(1);
591         }
592
593         /* Figure out how many descriptors will be cleaned */
594         if (last_desc_cleaned > desc_to_clean_to)
595                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
596                                                         desc_to_clean_to);
597         else
598                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
599                                                 last_desc_cleaned);
600
601         PMD_TX_FREE_LOG(DEBUG,
602                         "Cleaning %4u TX descriptors: %4u to %4u "
603                         "(port=%d queue=%d)",
604                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
605                         txq->port_id, txq->queue_id);
606
607         /*
608          * The last descriptor to clean is done, so that means all the
609          * descriptors from the last descriptor that was cleaned
610          * up to the last descriptor with the RS bit set
611          * are done. Only reset the threshold descriptor.
612          */
613         txr[desc_to_clean_to].wb.status = 0;
614
615         /* Update the txq to reflect the last descriptor that was cleaned */
616         txq->last_desc_cleaned = desc_to_clean_to;
617         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
618
619         /* No Error */
620         return 0;
621 }
622
623 uint16_t
624 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
625                 uint16_t nb_pkts)
626 {
627         struct ixgbe_tx_queue *txq;
628         struct ixgbe_tx_entry *sw_ring;
629         struct ixgbe_tx_entry *txe, *txn;
630         volatile union ixgbe_adv_tx_desc *txr;
631         volatile union ixgbe_adv_tx_desc *txd, *txp;
632         struct rte_mbuf     *tx_pkt;
633         struct rte_mbuf     *m_seg;
634         uint64_t buf_dma_addr;
635         uint32_t olinfo_status;
636         uint32_t cmd_type_len;
637         uint32_t pkt_len;
638         uint16_t slen;
639         uint64_t ol_flags;
640         uint16_t tx_id;
641         uint16_t tx_last;
642         uint16_t nb_tx;
643         uint16_t nb_used;
644         uint64_t tx_ol_req;
645         uint32_t ctx = 0;
646         uint32_t new_ctx;
647         union ixgbe_tx_offload tx_offload;
648 #ifdef RTE_LIBRTE_SECURITY
649         uint8_t use_ipsec;
650 #endif
651
652         tx_offload.data[0] = 0;
653         tx_offload.data[1] = 0;
654         txq = tx_queue;
655         sw_ring = txq->sw_ring;
656         txr     = txq->tx_ring;
657         tx_id   = txq->tx_tail;
658         txe = &sw_ring[tx_id];
659         txp = NULL;
660
661         /* Determine if the descriptor ring needs to be cleaned. */
662         if (txq->nb_tx_free < txq->tx_free_thresh)
663                 ixgbe_xmit_cleanup(txq);
664
665         rte_prefetch0(&txe->mbuf->pool);
666
667         /* TX loop */
668         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
669                 new_ctx = 0;
670                 tx_pkt = *tx_pkts++;
671                 pkt_len = tx_pkt->pkt_len;
672
673                 /*
674                  * Determine how many (if any) context descriptors
675                  * are needed for offload functionality.
676                  */
677                 ol_flags = tx_pkt->ol_flags;
678 #ifdef RTE_LIBRTE_SECURITY
679                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
680 #endif
681
682                 /* If hardware offload required */
683                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
684                 if (tx_ol_req) {
685                         tx_offload.l2_len = tx_pkt->l2_len;
686                         tx_offload.l3_len = tx_pkt->l3_len;
687                         tx_offload.l4_len = tx_pkt->l4_len;
688                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
689                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
690                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
691                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
692 #ifdef RTE_LIBRTE_SECURITY
693                         if (use_ipsec) {
694                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
695                                         (union ixgbe_crypto_tx_desc_md *)
696                                                         &tx_pkt->udata64;
697                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
698                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
699                         }
700 #endif
701
702                         /* If new context need be built or reuse the exist ctx. */
703                         ctx = what_advctx_update(txq, tx_ol_req,
704                                 tx_offload);
705                         /* Only allocate context descriptor if required*/
706                         new_ctx = (ctx == IXGBE_CTX_NUM);
707                         ctx = txq->ctx_curr;
708                 }
709
710                 /*
711                  * Keep track of how many descriptors are used this loop
712                  * This will always be the number of segments + the number of
713                  * Context descriptors required to transmit the packet
714                  */
715                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
716
717                 if (txp != NULL &&
718                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
719                         /* set RS on the previous packet in the burst */
720                         txp->read.cmd_type_len |=
721                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
722
723                 /*
724                  * The number of descriptors that must be allocated for a
725                  * packet is the number of segments of that packet, plus 1
726                  * Context Descriptor for the hardware offload, if any.
727                  * Determine the last TX descriptor to allocate in the TX ring
728                  * for the packet, starting from the current position (tx_id)
729                  * in the ring.
730                  */
731                 tx_last = (uint16_t) (tx_id + nb_used - 1);
732
733                 /* Circular ring */
734                 if (tx_last >= txq->nb_tx_desc)
735                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
736
737                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
738                            " tx_first=%u tx_last=%u",
739                            (unsigned) txq->port_id,
740                            (unsigned) txq->queue_id,
741                            (unsigned) pkt_len,
742                            (unsigned) tx_id,
743                            (unsigned) tx_last);
744
745                 /*
746                  * Make sure there are enough TX descriptors available to
747                  * transmit the entire packet.
748                  * nb_used better be less than or equal to txq->tx_rs_thresh
749                  */
750                 if (nb_used > txq->nb_tx_free) {
751                         PMD_TX_FREE_LOG(DEBUG,
752                                         "Not enough free TX descriptors "
753                                         "nb_used=%4u nb_free=%4u "
754                                         "(port=%d queue=%d)",
755                                         nb_used, txq->nb_tx_free,
756                                         txq->port_id, txq->queue_id);
757
758                         if (ixgbe_xmit_cleanup(txq) != 0) {
759                                 /* Could not clean any descriptors */
760                                 if (nb_tx == 0)
761                                         return 0;
762                                 goto end_of_tx;
763                         }
764
765                         /* nb_used better be <= txq->tx_rs_thresh */
766                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
767                                 PMD_TX_FREE_LOG(DEBUG,
768                                         "The number of descriptors needed to "
769                                         "transmit the packet exceeds the "
770                                         "RS bit threshold. This will impact "
771                                         "performance."
772                                         "nb_used=%4u nb_free=%4u "
773                                         "tx_rs_thresh=%4u. "
774                                         "(port=%d queue=%d)",
775                                         nb_used, txq->nb_tx_free,
776                                         txq->tx_rs_thresh,
777                                         txq->port_id, txq->queue_id);
778                                 /*
779                                  * Loop here until there are enough TX
780                                  * descriptors or until the ring cannot be
781                                  * cleaned.
782                                  */
783                                 while (nb_used > txq->nb_tx_free) {
784                                         if (ixgbe_xmit_cleanup(txq) != 0) {
785                                                 /*
786                                                  * Could not clean any
787                                                  * descriptors
788                                                  */
789                                                 if (nb_tx == 0)
790                                                         return 0;
791                                                 goto end_of_tx;
792                                         }
793                                 }
794                         }
795                 }
796
797                 /*
798                  * By now there are enough free TX descriptors to transmit
799                  * the packet.
800                  */
801
802                 /*
803                  * Set common flags of all TX Data Descriptors.
804                  *
805                  * The following bits must be set in all Data Descriptors:
806                  *   - IXGBE_ADVTXD_DTYP_DATA
807                  *   - IXGBE_ADVTXD_DCMD_DEXT
808                  *
809                  * The following bits must be set in the first Data Descriptor
810                  * and are ignored in the other ones:
811                  *   - IXGBE_ADVTXD_DCMD_IFCS
812                  *   - IXGBE_ADVTXD_MAC_1588
813                  *   - IXGBE_ADVTXD_DCMD_VLE
814                  *
815                  * The following bits must only be set in the last Data
816                  * Descriptor:
817                  *   - IXGBE_TXD_CMD_EOP
818                  *
819                  * The following bits can be set in any Data Descriptor, but
820                  * are only set in the last Data Descriptor:
821                  *   - IXGBE_TXD_CMD_RS
822                  */
823                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
824                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
825
826 #ifdef RTE_LIBRTE_IEEE1588
827                 if (ol_flags & PKT_TX_IEEE1588_TMST)
828                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
829 #endif
830
831                 olinfo_status = 0;
832                 if (tx_ol_req) {
833
834                         if (ol_flags & PKT_TX_TCP_SEG) {
835                                 /* when TSO is on, paylen in descriptor is the
836                                  * not the packet len but the tcp payload len */
837                                 pkt_len -= (tx_offload.l2_len +
838                                         tx_offload.l3_len + tx_offload.l4_len);
839                         }
840
841                         /*
842                          * Setup the TX Advanced Context Descriptor if required
843                          */
844                         if (new_ctx) {
845                                 volatile struct ixgbe_adv_tx_context_desc *
846                                     ctx_txd;
847
848                                 ctx_txd = (volatile struct
849                                     ixgbe_adv_tx_context_desc *)
850                                     &txr[tx_id];
851
852                                 txn = &sw_ring[txe->next_id];
853                                 rte_prefetch0(&txn->mbuf->pool);
854
855                                 if (txe->mbuf != NULL) {
856                                         rte_pktmbuf_free_seg(txe->mbuf);
857                                         txe->mbuf = NULL;
858                                 }
859
860                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
861                                         tx_offload, &tx_pkt->udata64);
862
863                                 txe->last_id = tx_last;
864                                 tx_id = txe->next_id;
865                                 txe = txn;
866                         }
867
868                         /*
869                          * Setup the TX Advanced Data Descriptor,
870                          * This path will go through
871                          * whatever new/reuse the context descriptor
872                          */
873                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
874                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
875                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
876                 }
877
878                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
879 #ifdef RTE_LIBRTE_SECURITY
880                 if (use_ipsec)
881                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
882 #endif
883
884                 m_seg = tx_pkt;
885                 do {
886                         txd = &txr[tx_id];
887                         txn = &sw_ring[txe->next_id];
888                         rte_prefetch0(&txn->mbuf->pool);
889
890                         if (txe->mbuf != NULL)
891                                 rte_pktmbuf_free_seg(txe->mbuf);
892                         txe->mbuf = m_seg;
893
894                         /*
895                          * Set up Transmit Data Descriptor.
896                          */
897                         slen = m_seg->data_len;
898                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
899                         txd->read.buffer_addr =
900                                 rte_cpu_to_le_64(buf_dma_addr);
901                         txd->read.cmd_type_len =
902                                 rte_cpu_to_le_32(cmd_type_len | slen);
903                         txd->read.olinfo_status =
904                                 rte_cpu_to_le_32(olinfo_status);
905                         txe->last_id = tx_last;
906                         tx_id = txe->next_id;
907                         txe = txn;
908                         m_seg = m_seg->next;
909                 } while (m_seg != NULL);
910
911                 /*
912                  * The last packet data descriptor needs End Of Packet (EOP)
913                  */
914                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
915                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
916                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
917
918                 /* Set RS bit only on threshold packets' last descriptor */
919                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
920                         PMD_TX_FREE_LOG(DEBUG,
921                                         "Setting RS bit on TXD id="
922                                         "%4u (port=%d queue=%d)",
923                                         tx_last, txq->port_id, txq->queue_id);
924
925                         cmd_type_len |= IXGBE_TXD_CMD_RS;
926
927                         /* Update txq RS bit counters */
928                         txq->nb_tx_used = 0;
929                         txp = NULL;
930                 } else
931                         txp = txd;
932
933                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
934         }
935
936 end_of_tx:
937         /* set RS on last packet in the burst */
938         if (txp != NULL)
939                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
940
941         rte_wmb();
942
943         /*
944          * Set the Transmit Descriptor Tail (TDT)
945          */
946         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
947                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
948                    (unsigned) tx_id, (unsigned) nb_tx);
949         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
950         txq->tx_tail = tx_id;
951
952         return nb_tx;
953 }
954
955 /*********************************************************************
956  *
957  *  TX prep functions
958  *
959  **********************************************************************/
960 uint16_t
961 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
962 {
963         int i, ret;
964         uint64_t ol_flags;
965         struct rte_mbuf *m;
966         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
967
968         for (i = 0; i < nb_pkts; i++) {
969                 m = tx_pkts[i];
970                 ol_flags = m->ol_flags;
971
972                 /**
973                  * Check if packet meets requirements for number of segments
974                  *
975                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
976                  *       non-TSO
977                  */
978
979                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
980                         rte_errno = EINVAL;
981                         return i;
982                 }
983
984                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
985                         rte_errno = ENOTSUP;
986                         return i;
987                 }
988
989                 /* check the size of packet */
990                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
991                         rte_errno = EINVAL;
992                         return i;
993                 }
994
995 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
996                 ret = rte_validate_tx_offload(m);
997                 if (ret != 0) {
998                         rte_errno = -ret;
999                         return i;
1000                 }
1001 #endif
1002                 ret = rte_net_intel_cksum_prepare(m);
1003                 if (ret != 0) {
1004                         rte_errno = -ret;
1005                         return i;
1006                 }
1007         }
1008
1009         return i;
1010 }
1011
1012 /*********************************************************************
1013  *
1014  *  RX functions
1015  *
1016  **********************************************************************/
1017
1018 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1019 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1020 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1021 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1022 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1023 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1024 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1027 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1028 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1029 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1030 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1031 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1032 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1035 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1036 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1039 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1040 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1043 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1044 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1047 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1048 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1051
1052 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1075
1076 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1099
1100 /**
1101  * Use 2 different table for normal packet and tunnel packet
1102  * to save the space.
1103  */
1104 const uint32_t
1105         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1106         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1107         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1108                 RTE_PTYPE_L3_IPV4,
1109         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1110                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1111         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1112                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1113         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1114                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1115         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1116                 RTE_PTYPE_L3_IPV4_EXT,
1117         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1118                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1119         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1120                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1121         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1122                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1123         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1124                 RTE_PTYPE_L3_IPV6,
1125         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1126                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1127         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1128                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1129         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1131         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1132                 RTE_PTYPE_L3_IPV6_EXT,
1133         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1134                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1135         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1137         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1138                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1139         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1140                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1141                 RTE_PTYPE_INNER_L3_IPV6,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1145         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1148         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1151         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1157         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1160         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1163         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1175         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1185                 RTE_PTYPE_L2_ETHER |
1186                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1187                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1188 };
1189
1190 const uint32_t
1191         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1192         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1193                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1194                 RTE_PTYPE_INNER_L2_ETHER,
1195         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1210         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1216                 RTE_PTYPE_INNER_L4_TCP,
1217         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1218                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1220                 RTE_PTYPE_INNER_L4_TCP,
1221         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1222                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1223                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1227                 RTE_PTYPE_INNER_L4_TCP,
1228         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1229                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1230                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1231                 RTE_PTYPE_INNER_L3_IPV4,
1232         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1233                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1234                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1235                 RTE_PTYPE_INNER_L4_UDP,
1236         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1237                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1238                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1239                 RTE_PTYPE_INNER_L4_UDP,
1240         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1241                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1242                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1243                 RTE_PTYPE_INNER_L4_SCTP,
1244         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1245                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1246                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1250                 RTE_PTYPE_INNER_L4_UDP,
1251         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1252                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1253                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1254                 RTE_PTYPE_INNER_L4_SCTP,
1255         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1256                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1257                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1258                 RTE_PTYPE_INNER_L3_IPV4,
1259         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1260                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1261                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1262                 RTE_PTYPE_INNER_L4_SCTP,
1263         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1264                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1265                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1266                 RTE_PTYPE_INNER_L4_SCTP,
1267         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1268                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1269                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1270                 RTE_PTYPE_INNER_L4_TCP,
1271         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1272                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1273                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1274                 RTE_PTYPE_INNER_L4_UDP,
1275
1276         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1277                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1278                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1279         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1282                 RTE_PTYPE_INNER_L3_IPV4,
1283         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1284                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1285                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1286                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1287         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1288                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1289                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1290                 RTE_PTYPE_INNER_L3_IPV6,
1291         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1292                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1293                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1294                 RTE_PTYPE_INNER_L3_IPV4,
1295         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1296                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1297                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1298                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1299         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1300                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1301                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1302                 RTE_PTYPE_INNER_L3_IPV4,
1303         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1304                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1305                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1306                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1307         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1308                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1309                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1310                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1311         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1312                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1313                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1314                 RTE_PTYPE_INNER_L3_IPV4,
1315         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1316                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1317                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1318                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1319         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1320                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1321                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1322                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1323         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1324                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1325                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1326                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1327         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1328                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1329                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1330                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1331         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1332                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1333                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1334                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1335         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1336                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1337                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1338                 RTE_PTYPE_INNER_L3_IPV4,
1339         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1340                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1341                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1342                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1343         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1344                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1345                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1346                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1347         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1348                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1349                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1350                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1351         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1352                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1353                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1354                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1355         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1356                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1357                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1358                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1359         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1360                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1361                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1362                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1363         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1364                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1365                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1366                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1367 };
1368
1369 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1370 static inline uint32_t
1371 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1372 {
1373
1374         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1375                 return RTE_PTYPE_UNKNOWN;
1376
1377         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1378
1379         /* For tunnel packet */
1380         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1381                 /* Remove the tunnel bit to save the space. */
1382                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1383                 return ptype_table_tn[pkt_info];
1384         }
1385
1386         /**
1387          * For x550, if it's not tunnel,
1388          * tunnel type bit should be set to 0.
1389          * Reuse 82599's mask.
1390          */
1391         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1392
1393         return ptype_table[pkt_info];
1394 }
1395
1396 static inline uint64_t
1397 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1398 {
1399         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1400                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1401                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1402                 PKT_RX_RSS_HASH, 0, 0, 0,
1403                 0, 0, 0,  PKT_RX_FDIR,
1404         };
1405 #ifdef RTE_LIBRTE_IEEE1588
1406         static uint64_t ip_pkt_etqf_map[8] = {
1407                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1408                 0, 0, 0, 0,
1409         };
1410
1411         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1412                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1413                                 ip_rss_types_map[pkt_info & 0XF];
1414         else
1415                 return ip_rss_types_map[pkt_info & 0XF];
1416 #else
1417         return ip_rss_types_map[pkt_info & 0XF];
1418 #endif
1419 }
1420
1421 static inline uint64_t
1422 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1423 {
1424         uint64_t pkt_flags;
1425
1426         /*
1427          * Check if VLAN present only.
1428          * Do not check whether L3/L4 rx checksum done by NIC or not,
1429          * That can be found from rte_eth_rxmode.offloads flag
1430          */
1431         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1432
1433 #ifdef RTE_LIBRTE_IEEE1588
1434         if (rx_status & IXGBE_RXD_STAT_TMST)
1435                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1436 #endif
1437         return pkt_flags;
1438 }
1439
1440 static inline uint64_t
1441 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1442 {
1443         uint64_t pkt_flags;
1444
1445         /*
1446          * Bit 31: IPE, IPv4 checksum error
1447          * Bit 30: L4I, L4I integrity error
1448          */
1449         static uint64_t error_to_pkt_flags_map[4] = {
1450                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1451                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1452                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1453                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1454         };
1455         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1456                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1457
1458         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1459             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1460                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1461         }
1462
1463 #ifdef RTE_LIBRTE_SECURITY
1464         if (rx_status & IXGBE_RXD_STAT_SECP) {
1465                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1466                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1467                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1468         }
1469 #endif
1470
1471         return pkt_flags;
1472 }
1473
1474 /*
1475  * LOOK_AHEAD defines how many desc statuses to check beyond the
1476  * current descriptor.
1477  * It must be a pound define for optimal performance.
1478  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1479  * function only works with LOOK_AHEAD=8.
1480  */
1481 #define LOOK_AHEAD 8
1482 #if (LOOK_AHEAD != 8)
1483 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1484 #endif
1485 static inline int
1486 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1487 {
1488         volatile union ixgbe_adv_rx_desc *rxdp;
1489         struct ixgbe_rx_entry *rxep;
1490         struct rte_mbuf *mb;
1491         uint16_t pkt_len;
1492         uint64_t pkt_flags;
1493         int nb_dd;
1494         uint32_t s[LOOK_AHEAD];
1495         uint32_t pkt_info[LOOK_AHEAD];
1496         int i, j, nb_rx = 0;
1497         uint32_t status;
1498         uint64_t vlan_flags = rxq->vlan_flags;
1499
1500         /* get references to current descriptor and S/W ring entry */
1501         rxdp = &rxq->rx_ring[rxq->rx_tail];
1502         rxep = &rxq->sw_ring[rxq->rx_tail];
1503
1504         status = rxdp->wb.upper.status_error;
1505         /* check to make sure there is at least 1 packet to receive */
1506         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1507                 return 0;
1508
1509         /*
1510          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1511          * reference packets that are ready to be received.
1512          */
1513         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1514              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1515                 /* Read desc statuses backwards to avoid race condition */
1516                 for (j = 0; j < LOOK_AHEAD; j++)
1517                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1518
1519                 rte_smp_rmb();
1520
1521                 /* Compute how many status bits were set */
1522                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1523                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1524                         ;
1525
1526                 for (j = 0; j < nb_dd; j++)
1527                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1528                                                        lo_dword.data);
1529
1530                 nb_rx += nb_dd;
1531
1532                 /* Translate descriptor info to mbuf format */
1533                 for (j = 0; j < nb_dd; ++j) {
1534                         mb = rxep[j].mbuf;
1535                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1536                                   rxq->crc_len;
1537                         mb->data_len = pkt_len;
1538                         mb->pkt_len = pkt_len;
1539                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1540
1541                         /* convert descriptor fields to rte mbuf flags */
1542                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1543                                 vlan_flags);
1544                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1545                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1546                                         ((uint16_t)pkt_info[j]);
1547                         mb->ol_flags = pkt_flags;
1548                         mb->packet_type =
1549                                 ixgbe_rxd_pkt_info_to_pkt_type
1550                                         (pkt_info[j], rxq->pkt_type_mask);
1551
1552                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1553                                 mb->hash.rss = rte_le_to_cpu_32(
1554                                     rxdp[j].wb.lower.hi_dword.rss);
1555                         else if (pkt_flags & PKT_RX_FDIR) {
1556                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1557                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1558                                     IXGBE_ATR_HASH_MASK;
1559                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1560                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1561                         }
1562                 }
1563
1564                 /* Move mbuf pointers from the S/W ring to the stage */
1565                 for (j = 0; j < LOOK_AHEAD; ++j) {
1566                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1567                 }
1568
1569                 /* stop if all requested packets could not be received */
1570                 if (nb_dd != LOOK_AHEAD)
1571                         break;
1572         }
1573
1574         /* clear software ring entries so we can cleanup correctly */
1575         for (i = 0; i < nb_rx; ++i) {
1576                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1577         }
1578
1579
1580         return nb_rx;
1581 }
1582
1583 static inline int
1584 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1585 {
1586         volatile union ixgbe_adv_rx_desc *rxdp;
1587         struct ixgbe_rx_entry *rxep;
1588         struct rte_mbuf *mb;
1589         uint16_t alloc_idx;
1590         __le64 dma_addr;
1591         int diag, i;
1592
1593         /* allocate buffers in bulk directly into the S/W ring */
1594         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1595         rxep = &rxq->sw_ring[alloc_idx];
1596         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1597                                     rxq->rx_free_thresh);
1598         if (unlikely(diag != 0))
1599                 return -ENOMEM;
1600
1601         rxdp = &rxq->rx_ring[alloc_idx];
1602         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1603                 /* populate the static rte mbuf fields */
1604                 mb = rxep[i].mbuf;
1605                 if (reset_mbuf) {
1606                         mb->port = rxq->port_id;
1607                 }
1608
1609                 rte_mbuf_refcnt_set(mb, 1);
1610                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1611
1612                 /* populate the descriptors */
1613                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1614                 rxdp[i].read.hdr_addr = 0;
1615                 rxdp[i].read.pkt_addr = dma_addr;
1616         }
1617
1618         /* update state of internal queue structure */
1619         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1620         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1621                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1622
1623         /* no errors */
1624         return 0;
1625 }
1626
1627 static inline uint16_t
1628 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1629                          uint16_t nb_pkts)
1630 {
1631         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1632         int i;
1633
1634         /* how many packets are ready to return? */
1635         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1636
1637         /* copy mbuf pointers to the application's packet list */
1638         for (i = 0; i < nb_pkts; ++i)
1639                 rx_pkts[i] = stage[i];
1640
1641         /* update internal queue state */
1642         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1643         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1644
1645         return nb_pkts;
1646 }
1647
1648 static inline uint16_t
1649 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1650              uint16_t nb_pkts)
1651 {
1652         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1653         uint16_t nb_rx = 0;
1654
1655         /* Any previously recv'd pkts will be returned from the Rx stage */
1656         if (rxq->rx_nb_avail)
1657                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1658
1659         /* Scan the H/W ring for packets to receive */
1660         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1661
1662         /* update internal queue state */
1663         rxq->rx_next_avail = 0;
1664         rxq->rx_nb_avail = nb_rx;
1665         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1666
1667         /* if required, allocate new buffers to replenish descriptors */
1668         if (rxq->rx_tail > rxq->rx_free_trigger) {
1669                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1670
1671                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1672                         int i, j;
1673
1674                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1675                                    "queue_id=%u", (unsigned) rxq->port_id,
1676                                    (unsigned) rxq->queue_id);
1677
1678                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1679                                 rxq->rx_free_thresh;
1680
1681                         /*
1682                          * Need to rewind any previous receives if we cannot
1683                          * allocate new buffers to replenish the old ones.
1684                          */
1685                         rxq->rx_nb_avail = 0;
1686                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1687                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1688                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1689
1690                         return 0;
1691                 }
1692
1693                 /* update tail pointer */
1694                 rte_wmb();
1695                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1696                                             cur_free_trigger);
1697         }
1698
1699         if (rxq->rx_tail >= rxq->nb_rx_desc)
1700                 rxq->rx_tail = 0;
1701
1702         /* received any packets this loop? */
1703         if (rxq->rx_nb_avail)
1704                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1705
1706         return 0;
1707 }
1708
1709 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1710 uint16_t
1711 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1712                            uint16_t nb_pkts)
1713 {
1714         uint16_t nb_rx;
1715
1716         if (unlikely(nb_pkts == 0))
1717                 return 0;
1718
1719         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1720                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1721
1722         /* request is relatively large, chunk it up */
1723         nb_rx = 0;
1724         while (nb_pkts) {
1725                 uint16_t ret, n;
1726
1727                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1728                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1729                 nb_rx = (uint16_t)(nb_rx + ret);
1730                 nb_pkts = (uint16_t)(nb_pkts - ret);
1731                 if (ret < n)
1732                         break;
1733         }
1734
1735         return nb_rx;
1736 }
1737
1738 uint16_t
1739 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1740                 uint16_t nb_pkts)
1741 {
1742         struct ixgbe_rx_queue *rxq;
1743         volatile union ixgbe_adv_rx_desc *rx_ring;
1744         volatile union ixgbe_adv_rx_desc *rxdp;
1745         struct ixgbe_rx_entry *sw_ring;
1746         struct ixgbe_rx_entry *rxe;
1747         struct rte_mbuf *rxm;
1748         struct rte_mbuf *nmb;
1749         union ixgbe_adv_rx_desc rxd;
1750         uint64_t dma_addr;
1751         uint32_t staterr;
1752         uint32_t pkt_info;
1753         uint16_t pkt_len;
1754         uint16_t rx_id;
1755         uint16_t nb_rx;
1756         uint16_t nb_hold;
1757         uint64_t pkt_flags;
1758         uint64_t vlan_flags;
1759
1760         nb_rx = 0;
1761         nb_hold = 0;
1762         rxq = rx_queue;
1763         rx_id = rxq->rx_tail;
1764         rx_ring = rxq->rx_ring;
1765         sw_ring = rxq->sw_ring;
1766         vlan_flags = rxq->vlan_flags;
1767         while (nb_rx < nb_pkts) {
1768                 /*
1769                  * The order of operations here is important as the DD status
1770                  * bit must not be read after any other descriptor fields.
1771                  * rx_ring and rxdp are pointing to volatile data so the order
1772                  * of accesses cannot be reordered by the compiler. If they were
1773                  * not volatile, they could be reordered which could lead to
1774                  * using invalid descriptor fields when read from rxd.
1775                  */
1776                 rxdp = &rx_ring[rx_id];
1777                 staterr = rxdp->wb.upper.status_error;
1778                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1779                         break;
1780                 rxd = *rxdp;
1781
1782                 /*
1783                  * End of packet.
1784                  *
1785                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1786                  * is likely to be invalid and to be dropped by the various
1787                  * validation checks performed by the network stack.
1788                  *
1789                  * Allocate a new mbuf to replenish the RX ring descriptor.
1790                  * If the allocation fails:
1791                  *    - arrange for that RX descriptor to be the first one
1792                  *      being parsed the next time the receive function is
1793                  *      invoked [on the same queue].
1794                  *
1795                  *    - Stop parsing the RX ring and return immediately.
1796                  *
1797                  * This policy do not drop the packet received in the RX
1798                  * descriptor for which the allocation of a new mbuf failed.
1799                  * Thus, it allows that packet to be later retrieved if
1800                  * mbuf have been freed in the mean time.
1801                  * As a side effect, holding RX descriptors instead of
1802                  * systematically giving them back to the NIC may lead to
1803                  * RX ring exhaustion situations.
1804                  * However, the NIC can gracefully prevent such situations
1805                  * to happen by sending specific "back-pressure" flow control
1806                  * frames to its peer(s).
1807                  */
1808                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1809                            "ext_err_stat=0x%08x pkt_len=%u",
1810                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1811                            (unsigned) rx_id, (unsigned) staterr,
1812                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1813
1814                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1815                 if (nmb == NULL) {
1816                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1817                                    "queue_id=%u", (unsigned) rxq->port_id,
1818                                    (unsigned) rxq->queue_id);
1819                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1820                         break;
1821                 }
1822
1823                 nb_hold++;
1824                 rxe = &sw_ring[rx_id];
1825                 rx_id++;
1826                 if (rx_id == rxq->nb_rx_desc)
1827                         rx_id = 0;
1828
1829                 /* Prefetch next mbuf while processing current one. */
1830                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1831
1832                 /*
1833                  * When next RX descriptor is on a cache-line boundary,
1834                  * prefetch the next 4 RX descriptors and the next 8 pointers
1835                  * to mbufs.
1836                  */
1837                 if ((rx_id & 0x3) == 0) {
1838                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1839                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1840                 }
1841
1842                 rxm = rxe->mbuf;
1843                 rxe->mbuf = nmb;
1844                 dma_addr =
1845                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1846                 rxdp->read.hdr_addr = 0;
1847                 rxdp->read.pkt_addr = dma_addr;
1848
1849                 /*
1850                  * Initialize the returned mbuf.
1851                  * 1) setup generic mbuf fields:
1852                  *    - number of segments,
1853                  *    - next segment,
1854                  *    - packet length,
1855                  *    - RX port identifier.
1856                  * 2) integrate hardware offload data, if any:
1857                  *    - RSS flag & hash,
1858                  *    - IP checksum flag,
1859                  *    - VLAN TCI, if any,
1860                  *    - error flags.
1861                  */
1862                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1863                                       rxq->crc_len);
1864                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1865                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1866                 rxm->nb_segs = 1;
1867                 rxm->next = NULL;
1868                 rxm->pkt_len = pkt_len;
1869                 rxm->data_len = pkt_len;
1870                 rxm->port = rxq->port_id;
1871
1872                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1873                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1874                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1875
1876                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1877                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1878                 pkt_flags = pkt_flags |
1879                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1880                 rxm->ol_flags = pkt_flags;
1881                 rxm->packet_type =
1882                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1883                                                        rxq->pkt_type_mask);
1884
1885                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1886                         rxm->hash.rss = rte_le_to_cpu_32(
1887                                                 rxd.wb.lower.hi_dword.rss);
1888                 else if (pkt_flags & PKT_RX_FDIR) {
1889                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1890                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1891                                         IXGBE_ATR_HASH_MASK;
1892                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1893                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1894                 }
1895                 /*
1896                  * Store the mbuf address into the next entry of the array
1897                  * of returned packets.
1898                  */
1899                 rx_pkts[nb_rx++] = rxm;
1900         }
1901         rxq->rx_tail = rx_id;
1902
1903         /*
1904          * If the number of free RX descriptors is greater than the RX free
1905          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1906          * register.
1907          * Update the RDT with the value of the last processed RX descriptor
1908          * minus 1, to guarantee that the RDT register is never equal to the
1909          * RDH register, which creates a "full" ring situtation from the
1910          * hardware point of view...
1911          */
1912         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1913         if (nb_hold > rxq->rx_free_thresh) {
1914                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1915                            "nb_hold=%u nb_rx=%u",
1916                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1917                            (unsigned) rx_id, (unsigned) nb_hold,
1918                            (unsigned) nb_rx);
1919                 rx_id = (uint16_t) ((rx_id == 0) ?
1920                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1921                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1922                 nb_hold = 0;
1923         }
1924         rxq->nb_rx_hold = nb_hold;
1925         return nb_rx;
1926 }
1927
1928 /**
1929  * Detect an RSC descriptor.
1930  */
1931 static inline uint32_t
1932 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1933 {
1934         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1935                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1936 }
1937
1938 /**
1939  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1940  *
1941  * Fill the following info in the HEAD buffer of the Rx cluster:
1942  *    - RX port identifier
1943  *    - hardware offload data, if any:
1944  *      - RSS flag & hash
1945  *      - IP checksum flag
1946  *      - VLAN TCI, if any
1947  *      - error flags
1948  * @head HEAD of the packet cluster
1949  * @desc HW descriptor to get data from
1950  * @rxq Pointer to the Rx queue
1951  */
1952 static inline void
1953 ixgbe_fill_cluster_head_buf(
1954         struct rte_mbuf *head,
1955         union ixgbe_adv_rx_desc *desc,
1956         struct ixgbe_rx_queue *rxq,
1957         uint32_t staterr)
1958 {
1959         uint32_t pkt_info;
1960         uint64_t pkt_flags;
1961
1962         head->port = rxq->port_id;
1963
1964         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1965          * set in the pkt_flags field.
1966          */
1967         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1968         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1969         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1970         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1971         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1972         head->ol_flags = pkt_flags;
1973         head->packet_type =
1974                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1975
1976         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1977                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1978         else if (pkt_flags & PKT_RX_FDIR) {
1979                 head->hash.fdir.hash =
1980                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1981                                                           & IXGBE_ATR_HASH_MASK;
1982                 head->hash.fdir.id =
1983                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1984         }
1985 }
1986
1987 /**
1988  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1989  *
1990  * @rx_queue Rx queue handle
1991  * @rx_pkts table of received packets
1992  * @nb_pkts size of rx_pkts table
1993  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1994  *
1995  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1996  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1997  *
1998  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1999  * 1) When non-EOP RSC completion arrives:
2000  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2001  *       segment's data length.
2002  *    b) Set the "next" pointer of the current segment to point to the segment
2003  *       at the NEXTP index.
2004  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2005  *       in the sw_rsc_ring.
2006  * 2) When EOP arrives we just update the cluster's total length and offload
2007  *    flags and deliver the cluster up to the upper layers. In our case - put it
2008  *    in the rx_pkts table.
2009  *
2010  * Returns the number of received packets/clusters (according to the "bulk
2011  * receive" interface).
2012  */
2013 static inline uint16_t
2014 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2015                     bool bulk_alloc)
2016 {
2017         struct ixgbe_rx_queue *rxq = rx_queue;
2018         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2019         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2020         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2021         uint16_t rx_id = rxq->rx_tail;
2022         uint16_t nb_rx = 0;
2023         uint16_t nb_hold = rxq->nb_rx_hold;
2024         uint16_t prev_id = rxq->rx_tail;
2025
2026         while (nb_rx < nb_pkts) {
2027                 bool eop;
2028                 struct ixgbe_rx_entry *rxe;
2029                 struct ixgbe_scattered_rx_entry *sc_entry;
2030                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2031                 struct ixgbe_rx_entry *next_rxe = NULL;
2032                 struct rte_mbuf *first_seg;
2033                 struct rte_mbuf *rxm;
2034                 struct rte_mbuf *nmb = NULL;
2035                 union ixgbe_adv_rx_desc rxd;
2036                 uint16_t data_len;
2037                 uint16_t next_id;
2038                 volatile union ixgbe_adv_rx_desc *rxdp;
2039                 uint32_t staterr;
2040
2041 next_desc:
2042                 /*
2043                  * The code in this whole file uses the volatile pointer to
2044                  * ensure the read ordering of the status and the rest of the
2045                  * descriptor fields (on the compiler level only!!!). This is so
2046                  * UGLY - why not to just use the compiler barrier instead? DPDK
2047                  * even has the rte_compiler_barrier() for that.
2048                  *
2049                  * But most importantly this is just wrong because this doesn't
2050                  * ensure memory ordering in a general case at all. For
2051                  * instance, DPDK is supposed to work on Power CPUs where
2052                  * compiler barrier may just not be enough!
2053                  *
2054                  * I tried to write only this function properly to have a
2055                  * starting point (as a part of an LRO/RSC series) but the
2056                  * compiler cursed at me when I tried to cast away the
2057                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2058                  * keeping it the way it is for now.
2059                  *
2060                  * The code in this file is broken in so many other places and
2061                  * will just not work on a big endian CPU anyway therefore the
2062                  * lines below will have to be revisited together with the rest
2063                  * of the ixgbe PMD.
2064                  *
2065                  * TODO:
2066                  *    - Get rid of "volatile" and let the compiler do its job.
2067                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2068                  *      memory ordering below.
2069                  */
2070                 rxdp = &rx_ring[rx_id];
2071                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2072
2073                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2074                         break;
2075
2076                 rxd = *rxdp;
2077
2078                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2079                                   "staterr=0x%x data_len=%u",
2080                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2081                            rte_le_to_cpu_16(rxd.wb.upper.length));
2082
2083                 if (!bulk_alloc) {
2084                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2085                         if (nmb == NULL) {
2086                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2087                                                   "port_id=%u queue_id=%u",
2088                                            rxq->port_id, rxq->queue_id);
2089
2090                                 rte_eth_devices[rxq->port_id].data->
2091                                                         rx_mbuf_alloc_failed++;
2092                                 break;
2093                         }
2094                 } else if (nb_hold > rxq->rx_free_thresh) {
2095                         uint16_t next_rdt = rxq->rx_free_trigger;
2096
2097                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2098                                 rte_wmb();
2099                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2100                                                             next_rdt);
2101                                 nb_hold -= rxq->rx_free_thresh;
2102                         } else {
2103                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2104                                                   "port_id=%u queue_id=%u",
2105                                            rxq->port_id, rxq->queue_id);
2106
2107                                 rte_eth_devices[rxq->port_id].data->
2108                                                         rx_mbuf_alloc_failed++;
2109                                 break;
2110                         }
2111                 }
2112
2113                 nb_hold++;
2114                 rxe = &sw_ring[rx_id];
2115                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2116
2117                 next_id = rx_id + 1;
2118                 if (next_id == rxq->nb_rx_desc)
2119                         next_id = 0;
2120
2121                 /* Prefetch next mbuf while processing current one. */
2122                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2123
2124                 /*
2125                  * When next RX descriptor is on a cache-line boundary,
2126                  * prefetch the next 4 RX descriptors and the next 4 pointers
2127                  * to mbufs.
2128                  */
2129                 if ((next_id & 0x3) == 0) {
2130                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2131                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2132                 }
2133
2134                 rxm = rxe->mbuf;
2135
2136                 if (!bulk_alloc) {
2137                         __le64 dma =
2138                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2139                         /*
2140                          * Update RX descriptor with the physical address of the
2141                          * new data buffer of the new allocated mbuf.
2142                          */
2143                         rxe->mbuf = nmb;
2144
2145                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2146                         rxdp->read.hdr_addr = 0;
2147                         rxdp->read.pkt_addr = dma;
2148                 } else
2149                         rxe->mbuf = NULL;
2150
2151                 /*
2152                  * Set data length & data buffer address of mbuf.
2153                  */
2154                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2155                 rxm->data_len = data_len;
2156
2157                 if (!eop) {
2158                         uint16_t nextp_id;
2159                         /*
2160                          * Get next descriptor index:
2161                          *  - For RSC it's in the NEXTP field.
2162                          *  - For a scattered packet - it's just a following
2163                          *    descriptor.
2164                          */
2165                         if (ixgbe_rsc_count(&rxd))
2166                                 nextp_id =
2167                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2168                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2169                         else
2170                                 nextp_id = next_id;
2171
2172                         next_sc_entry = &sw_sc_ring[nextp_id];
2173                         next_rxe = &sw_ring[nextp_id];
2174                         rte_ixgbe_prefetch(next_rxe);
2175                 }
2176
2177                 sc_entry = &sw_sc_ring[rx_id];
2178                 first_seg = sc_entry->fbuf;
2179                 sc_entry->fbuf = NULL;
2180
2181                 /*
2182                  * If this is the first buffer of the received packet,
2183                  * set the pointer to the first mbuf of the packet and
2184                  * initialize its context.
2185                  * Otherwise, update the total length and the number of segments
2186                  * of the current scattered packet, and update the pointer to
2187                  * the last mbuf of the current packet.
2188                  */
2189                 if (first_seg == NULL) {
2190                         first_seg = rxm;
2191                         first_seg->pkt_len = data_len;
2192                         first_seg->nb_segs = 1;
2193                 } else {
2194                         first_seg->pkt_len += data_len;
2195                         first_seg->nb_segs++;
2196                 }
2197
2198                 prev_id = rx_id;
2199                 rx_id = next_id;
2200
2201                 /*
2202                  * If this is not the last buffer of the received packet, update
2203                  * the pointer to the first mbuf at the NEXTP entry in the
2204                  * sw_sc_ring and continue to parse the RX ring.
2205                  */
2206                 if (!eop && next_rxe) {
2207                         rxm->next = next_rxe->mbuf;
2208                         next_sc_entry->fbuf = first_seg;
2209                         goto next_desc;
2210                 }
2211
2212                 /* Initialize the first mbuf of the returned packet */
2213                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2214
2215                 /*
2216                  * Deal with the case, when HW CRC srip is disabled.
2217                  * That can't happen when LRO is enabled, but still could
2218                  * happen for scattered RX mode.
2219                  */
2220                 first_seg->pkt_len -= rxq->crc_len;
2221                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2222                         struct rte_mbuf *lp;
2223
2224                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2225                                 ;
2226
2227                         first_seg->nb_segs--;
2228                         lp->data_len -= rxq->crc_len - rxm->data_len;
2229                         lp->next = NULL;
2230                         rte_pktmbuf_free_seg(rxm);
2231                 } else
2232                         rxm->data_len -= rxq->crc_len;
2233
2234                 /* Prefetch data of first segment, if configured to do so. */
2235                 rte_packet_prefetch((char *)first_seg->buf_addr +
2236                         first_seg->data_off);
2237
2238                 /*
2239                  * Store the mbuf address into the next entry of the array
2240                  * of returned packets.
2241                  */
2242                 rx_pkts[nb_rx++] = first_seg;
2243         }
2244
2245         /*
2246          * Record index of the next RX descriptor to probe.
2247          */
2248         rxq->rx_tail = rx_id;
2249
2250         /*
2251          * If the number of free RX descriptors is greater than the RX free
2252          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2253          * register.
2254          * Update the RDT with the value of the last processed RX descriptor
2255          * minus 1, to guarantee that the RDT register is never equal to the
2256          * RDH register, which creates a "full" ring situtation from the
2257          * hardware point of view...
2258          */
2259         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2260                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2261                            "nb_hold=%u nb_rx=%u",
2262                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2263
2264                 rte_wmb();
2265                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2266                 nb_hold = 0;
2267         }
2268
2269         rxq->nb_rx_hold = nb_hold;
2270         return nb_rx;
2271 }
2272
2273 uint16_t
2274 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2275                                  uint16_t nb_pkts)
2276 {
2277         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2278 }
2279
2280 uint16_t
2281 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2282                                uint16_t nb_pkts)
2283 {
2284         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2285 }
2286
2287 /*********************************************************************
2288  *
2289  *  Queue management functions
2290  *
2291  **********************************************************************/
2292
2293 static void __attribute__((cold))
2294 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2295 {
2296         unsigned i;
2297
2298         if (txq->sw_ring != NULL) {
2299                 for (i = 0; i < txq->nb_tx_desc; i++) {
2300                         if (txq->sw_ring[i].mbuf != NULL) {
2301                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2302                                 txq->sw_ring[i].mbuf = NULL;
2303                         }
2304                 }
2305         }
2306 }
2307
2308 static int
2309 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2310 {
2311         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2312         uint16_t i, tx_last, tx_id;
2313         uint16_t nb_tx_free_last;
2314         uint16_t nb_tx_to_clean;
2315         uint32_t pkt_cnt;
2316
2317         /* Start free mbuf from the next of tx_tail */
2318         tx_last = txq->tx_tail;
2319         tx_id  = swr_ring[tx_last].next_id;
2320
2321         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2322                 return 0;
2323
2324         nb_tx_to_clean = txq->nb_tx_free;
2325         nb_tx_free_last = txq->nb_tx_free;
2326         if (!free_cnt)
2327                 free_cnt = txq->nb_tx_desc;
2328
2329         /* Loop through swr_ring to count the amount of
2330          * freeable mubfs and packets.
2331          */
2332         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2333                 for (i = 0; i < nb_tx_to_clean &&
2334                         pkt_cnt < free_cnt &&
2335                         tx_id != tx_last; i++) {
2336                         if (swr_ring[tx_id].mbuf != NULL) {
2337                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2338                                 swr_ring[tx_id].mbuf = NULL;
2339
2340                                 /*
2341                                  * last segment in the packet,
2342                                  * increment packet count
2343                                  */
2344                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2345                         }
2346
2347                         tx_id = swr_ring[tx_id].next_id;
2348                 }
2349
2350                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2351                         txq->nb_tx_free || tx_id == tx_last)
2352                         break;
2353
2354                 if (pkt_cnt < free_cnt) {
2355                         if (ixgbe_xmit_cleanup(txq))
2356                                 break;
2357
2358                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2359                         nb_tx_free_last = txq->nb_tx_free;
2360                 }
2361         }
2362
2363         return (int)pkt_cnt;
2364 }
2365
2366 static int
2367 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2368                         uint32_t free_cnt)
2369 {
2370         int i, n, cnt;
2371
2372         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2373                 free_cnt = txq->nb_tx_desc;
2374
2375         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2376
2377         for (i = 0; i < cnt; i += n) {
2378                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2379                         break;
2380
2381                 n = ixgbe_tx_free_bufs(txq);
2382
2383                 if (n == 0)
2384                         break;
2385         }
2386
2387         return i;
2388 }
2389
2390 static int
2391 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2392                         uint32_t free_cnt __rte_unused)
2393 {
2394         return -ENOTSUP;
2395 }
2396
2397 int
2398 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2399 {
2400         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2401         if (txq->offloads == 0 &&
2402 #ifdef RTE_LIBRTE_SECURITY
2403                         !(txq->using_ipsec) &&
2404 #endif
2405                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2406                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2407                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2408                                         txq->sw_ring_v != NULL)) {
2409                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2410                 } else {
2411                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2412                 }
2413         }
2414
2415         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2416 }
2417
2418 static void __attribute__((cold))
2419 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2420 {
2421         if (txq != NULL &&
2422             txq->sw_ring != NULL)
2423                 rte_free(txq->sw_ring);
2424 }
2425
2426 static void __attribute__((cold))
2427 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2428 {
2429         if (txq != NULL && txq->ops != NULL) {
2430                 txq->ops->release_mbufs(txq);
2431                 txq->ops->free_swring(txq);
2432                 rte_free(txq);
2433         }
2434 }
2435
2436 void __attribute__((cold))
2437 ixgbe_dev_tx_queue_release(void *txq)
2438 {
2439         ixgbe_tx_queue_release(txq);
2440 }
2441
2442 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2443 static void __attribute__((cold))
2444 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2445 {
2446         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2447         struct ixgbe_tx_entry *txe = txq->sw_ring;
2448         uint16_t prev, i;
2449
2450         /* Zero out HW ring memory */
2451         for (i = 0; i < txq->nb_tx_desc; i++) {
2452                 txq->tx_ring[i] = zeroed_desc;
2453         }
2454
2455         /* Initialize SW ring entries */
2456         prev = (uint16_t) (txq->nb_tx_desc - 1);
2457         for (i = 0; i < txq->nb_tx_desc; i++) {
2458                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2459
2460                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2461                 txe[i].mbuf = NULL;
2462                 txe[i].last_id = i;
2463                 txe[prev].next_id = i;
2464                 prev = i;
2465         }
2466
2467         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2468         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2469
2470         txq->tx_tail = 0;
2471         txq->nb_tx_used = 0;
2472         /*
2473          * Always allow 1 descriptor to be un-allocated to avoid
2474          * a H/W race condition
2475          */
2476         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2477         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2478         txq->ctx_curr = 0;
2479         memset((void *)&txq->ctx_cache, 0,
2480                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2481 }
2482
2483 static const struct ixgbe_txq_ops def_txq_ops = {
2484         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2485         .free_swring = ixgbe_tx_free_swring,
2486         .reset = ixgbe_reset_tx_queue,
2487 };
2488
2489 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2490  * the queue parameters. Used in tx_queue_setup by primary process and then
2491  * in dev_init by secondary process when attaching to an existing ethdev.
2492  */
2493 void __attribute__((cold))
2494 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2495 {
2496         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2497         if ((txq->offloads == 0) &&
2498 #ifdef RTE_LIBRTE_SECURITY
2499                         !(txq->using_ipsec) &&
2500 #endif
2501                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2502                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2503                 dev->tx_pkt_prepare = NULL;
2504                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2505                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2506                                         ixgbe_txq_vec_setup(txq) == 0)) {
2507                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2508                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2509                 } else
2510                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2511         } else {
2512                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2513                 PMD_INIT_LOG(DEBUG,
2514                                 " - offloads = 0x%" PRIx64,
2515                                 txq->offloads);
2516                 PMD_INIT_LOG(DEBUG,
2517                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2518                                 (unsigned long)txq->tx_rs_thresh,
2519                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2520                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2521                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2522         }
2523 }
2524
2525 uint64_t
2526 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2527 {
2528         RTE_SET_USED(dev);
2529
2530         return 0;
2531 }
2532
2533 uint64_t
2534 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2535 {
2536         uint64_t tx_offload_capa;
2537         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2538
2539         tx_offload_capa =
2540                 DEV_TX_OFFLOAD_VLAN_INSERT |
2541                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2542                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2543                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2544                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2545                 DEV_TX_OFFLOAD_TCP_TSO     |
2546                 DEV_TX_OFFLOAD_MULTI_SEGS;
2547
2548         if (hw->mac.type == ixgbe_mac_82599EB ||
2549             hw->mac.type == ixgbe_mac_X540)
2550                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2551
2552         if (hw->mac.type == ixgbe_mac_X550 ||
2553             hw->mac.type == ixgbe_mac_X550EM_x ||
2554             hw->mac.type == ixgbe_mac_X550EM_a)
2555                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2556
2557 #ifdef RTE_LIBRTE_SECURITY
2558         if (dev->security_ctx)
2559                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2560 #endif
2561         return tx_offload_capa;
2562 }
2563
2564 int __attribute__((cold))
2565 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2566                          uint16_t queue_idx,
2567                          uint16_t nb_desc,
2568                          unsigned int socket_id,
2569                          const struct rte_eth_txconf *tx_conf)
2570 {
2571         const struct rte_memzone *tz;
2572         struct ixgbe_tx_queue *txq;
2573         struct ixgbe_hw     *hw;
2574         uint16_t tx_rs_thresh, tx_free_thresh;
2575         uint64_t offloads;
2576
2577         PMD_INIT_FUNC_TRACE();
2578         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2579
2580         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2581
2582         /*
2583          * Validate number of transmit descriptors.
2584          * It must not exceed hardware maximum, and must be multiple
2585          * of IXGBE_ALIGN.
2586          */
2587         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2588                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2589                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2590                 return -EINVAL;
2591         }
2592
2593         /*
2594          * The following two parameters control the setting of the RS bit on
2595          * transmit descriptors.
2596          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2597          * descriptors have been used.
2598          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2599          * descriptors are used or if the number of descriptors required
2600          * to transmit a packet is greater than the number of free TX
2601          * descriptors.
2602          * The following constraints must be satisfied:
2603          *  tx_rs_thresh must be greater than 0.
2604          *  tx_rs_thresh must be less than the size of the ring minus 2.
2605          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2606          *  tx_rs_thresh must be a divisor of the ring size.
2607          *  tx_free_thresh must be greater than 0.
2608          *  tx_free_thresh must be less than the size of the ring minus 3.
2609          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2610          * One descriptor in the TX ring is used as a sentinel to avoid a
2611          * H/W race condition, hence the maximum threshold constraints.
2612          * When set to zero use default values.
2613          */
2614         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2615                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2616         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2617         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2618                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2619         if (tx_conf->tx_rs_thresh > 0)
2620                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2621         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2622                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2623                              "exceed nb_desc. (tx_rs_thresh=%u "
2624                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2625                              (unsigned int)tx_rs_thresh,
2626                              (unsigned int)tx_free_thresh,
2627                              (unsigned int)nb_desc,
2628                              (int)dev->data->port_id,
2629                              (int)queue_idx);
2630                 return -(EINVAL);
2631         }
2632         if (tx_rs_thresh >= (nb_desc - 2)) {
2633                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2634                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2635                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2636                         (int)dev->data->port_id, (int)queue_idx);
2637                 return -(EINVAL);
2638         }
2639         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2640                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2641                         "(tx_rs_thresh=%u port=%d queue=%d)",
2642                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2643                         (int)dev->data->port_id, (int)queue_idx);
2644                 return -(EINVAL);
2645         }
2646         if (tx_free_thresh >= (nb_desc - 3)) {
2647                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2648                              "tx_free_thresh must be less than the number of "
2649                              "TX descriptors minus 3. (tx_free_thresh=%u "
2650                              "port=%d queue=%d)",
2651                              (unsigned int)tx_free_thresh,
2652                              (int)dev->data->port_id, (int)queue_idx);
2653                 return -(EINVAL);
2654         }
2655         if (tx_rs_thresh > tx_free_thresh) {
2656                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2657                              "tx_free_thresh. (tx_free_thresh=%u "
2658                              "tx_rs_thresh=%u port=%d queue=%d)",
2659                              (unsigned int)tx_free_thresh,
2660                              (unsigned int)tx_rs_thresh,
2661                              (int)dev->data->port_id,
2662                              (int)queue_idx);
2663                 return -(EINVAL);
2664         }
2665         if ((nb_desc % tx_rs_thresh) != 0) {
2666                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2667                              "number of TX descriptors. (tx_rs_thresh=%u "
2668                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2669                              (int)dev->data->port_id, (int)queue_idx);
2670                 return -(EINVAL);
2671         }
2672
2673         /*
2674          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2675          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2676          * by the NIC and all descriptors are written back after the NIC
2677          * accumulates WTHRESH descriptors.
2678          */
2679         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2680                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2681                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2682                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2683                              (int)dev->data->port_id, (int)queue_idx);
2684                 return -(EINVAL);
2685         }
2686
2687         /* Free memory prior to re-allocation if needed... */
2688         if (dev->data->tx_queues[queue_idx] != NULL) {
2689                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2690                 dev->data->tx_queues[queue_idx] = NULL;
2691         }
2692
2693         /* First allocate the tx queue data structure */
2694         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2695                                  RTE_CACHE_LINE_SIZE, socket_id);
2696         if (txq == NULL)
2697                 return -ENOMEM;
2698
2699         /*
2700          * Allocate TX ring hardware descriptors. A memzone large enough to
2701          * handle the maximum ring size is allocated in order to allow for
2702          * resizing in later calls to the queue setup function.
2703          */
2704         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2705                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2706                         IXGBE_ALIGN, socket_id);
2707         if (tz == NULL) {
2708                 ixgbe_tx_queue_release(txq);
2709                 return -ENOMEM;
2710         }
2711
2712         txq->nb_tx_desc = nb_desc;
2713         txq->tx_rs_thresh = tx_rs_thresh;
2714         txq->tx_free_thresh = tx_free_thresh;
2715         txq->pthresh = tx_conf->tx_thresh.pthresh;
2716         txq->hthresh = tx_conf->tx_thresh.hthresh;
2717         txq->wthresh = tx_conf->tx_thresh.wthresh;
2718         txq->queue_id = queue_idx;
2719         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2720                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2721         txq->port_id = dev->data->port_id;
2722         txq->offloads = offloads;
2723         txq->ops = &def_txq_ops;
2724         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2725 #ifdef RTE_LIBRTE_SECURITY
2726         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2727                         DEV_TX_OFFLOAD_SECURITY);
2728 #endif
2729
2730         /*
2731          * Modification to set VFTDT for virtual function if vf is detected
2732          */
2733         if (hw->mac.type == ixgbe_mac_82599_vf ||
2734             hw->mac.type == ixgbe_mac_X540_vf ||
2735             hw->mac.type == ixgbe_mac_X550_vf ||
2736             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2737             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2738                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2739         else
2740                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2741
2742         txq->tx_ring_phys_addr = tz->iova;
2743         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2744
2745         /* Allocate software ring */
2746         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2747                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2748                                 RTE_CACHE_LINE_SIZE, socket_id);
2749         if (txq->sw_ring == NULL) {
2750                 ixgbe_tx_queue_release(txq);
2751                 return -ENOMEM;
2752         }
2753         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2754                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2755
2756         /* set up vector or scalar TX function as appropriate */
2757         ixgbe_set_tx_function(dev, txq);
2758
2759         txq->ops->reset(txq);
2760
2761         dev->data->tx_queues[queue_idx] = txq;
2762
2763
2764         return 0;
2765 }
2766
2767 /**
2768  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2769  *
2770  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2771  * in the sw_rsc_ring is not set to NULL but rather points to the next
2772  * mbuf of this RSC aggregation (that has not been completed yet and still
2773  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2774  * will just free first "nb_segs" segments of the cluster explicitly by calling
2775  * an rte_pktmbuf_free_seg().
2776  *
2777  * @m scattered cluster head
2778  */
2779 static void __attribute__((cold))
2780 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2781 {
2782         uint16_t i, nb_segs = m->nb_segs;
2783         struct rte_mbuf *next_seg;
2784
2785         for (i = 0; i < nb_segs; i++) {
2786                 next_seg = m->next;
2787                 rte_pktmbuf_free_seg(m);
2788                 m = next_seg;
2789         }
2790 }
2791
2792 static void __attribute__((cold))
2793 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2794 {
2795         unsigned i;
2796
2797         /* SSE Vector driver has a different way of releasing mbufs. */
2798         if (rxq->rx_using_sse) {
2799                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2800                 return;
2801         }
2802
2803         if (rxq->sw_ring != NULL) {
2804                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2805                         if (rxq->sw_ring[i].mbuf != NULL) {
2806                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2807                                 rxq->sw_ring[i].mbuf = NULL;
2808                         }
2809                 }
2810                 if (rxq->rx_nb_avail) {
2811                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2812                                 struct rte_mbuf *mb;
2813
2814                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2815                                 rte_pktmbuf_free_seg(mb);
2816                         }
2817                         rxq->rx_nb_avail = 0;
2818                 }
2819         }
2820
2821         if (rxq->sw_sc_ring)
2822                 for (i = 0; i < rxq->nb_rx_desc; i++)
2823                         if (rxq->sw_sc_ring[i].fbuf) {
2824                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2825                                 rxq->sw_sc_ring[i].fbuf = NULL;
2826                         }
2827 }
2828
2829 static void __attribute__((cold))
2830 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2831 {
2832         if (rxq != NULL) {
2833                 ixgbe_rx_queue_release_mbufs(rxq);
2834                 rte_free(rxq->sw_ring);
2835                 rte_free(rxq->sw_sc_ring);
2836                 rte_free(rxq);
2837         }
2838 }
2839
2840 void __attribute__((cold))
2841 ixgbe_dev_rx_queue_release(void *rxq)
2842 {
2843         ixgbe_rx_queue_release(rxq);
2844 }
2845
2846 /*
2847  * Check if Rx Burst Bulk Alloc function can be used.
2848  * Return
2849  *        0: the preconditions are satisfied and the bulk allocation function
2850  *           can be used.
2851  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2852  *           function must be used.
2853  */
2854 static inline int __attribute__((cold))
2855 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2856 {
2857         int ret = 0;
2858
2859         /*
2860          * Make sure the following pre-conditions are satisfied:
2861          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2862          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2863          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2864          * Scattered packets are not supported.  This should be checked
2865          * outside of this function.
2866          */
2867         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2868                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2869                              "rxq->rx_free_thresh=%d, "
2870                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2871                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2872                 ret = -EINVAL;
2873         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2874                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2875                              "rxq->rx_free_thresh=%d, "
2876                              "rxq->nb_rx_desc=%d",
2877                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2878                 ret = -EINVAL;
2879         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2880                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2881                              "rxq->nb_rx_desc=%d, "
2882                              "rxq->rx_free_thresh=%d",
2883                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2884                 ret = -EINVAL;
2885         }
2886
2887         return ret;
2888 }
2889
2890 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2891 static void __attribute__((cold))
2892 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2893 {
2894         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2895         unsigned i;
2896         uint16_t len = rxq->nb_rx_desc;
2897
2898         /*
2899          * By default, the Rx queue setup function allocates enough memory for
2900          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2901          * extra memory at the end of the descriptor ring to be zero'd out.
2902          */
2903         if (adapter->rx_bulk_alloc_allowed)
2904                 /* zero out extra memory */
2905                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2906
2907         /*
2908          * Zero out HW ring memory. Zero out extra memory at the end of
2909          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2910          * reads extra memory as zeros.
2911          */
2912         for (i = 0; i < len; i++) {
2913                 rxq->rx_ring[i] = zeroed_desc;
2914         }
2915
2916         /*
2917          * initialize extra software ring entries. Space for these extra
2918          * entries is always allocated
2919          */
2920         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2921         for (i = rxq->nb_rx_desc; i < len; ++i) {
2922                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2923         }
2924
2925         rxq->rx_nb_avail = 0;
2926         rxq->rx_next_avail = 0;
2927         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2928         rxq->rx_tail = 0;
2929         rxq->nb_rx_hold = 0;
2930         rxq->pkt_first_seg = NULL;
2931         rxq->pkt_last_seg = NULL;
2932
2933 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2934         rxq->rxrearm_start = 0;
2935         rxq->rxrearm_nb = 0;
2936 #endif
2937 }
2938
2939 static int
2940 ixgbe_is_vf(struct rte_eth_dev *dev)
2941 {
2942         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2943
2944         switch (hw->mac.type) {
2945         case ixgbe_mac_82599_vf:
2946         case ixgbe_mac_X540_vf:
2947         case ixgbe_mac_X550_vf:
2948         case ixgbe_mac_X550EM_x_vf:
2949         case ixgbe_mac_X550EM_a_vf:
2950                 return 1;
2951         default:
2952                 return 0;
2953         }
2954 }
2955
2956 uint64_t
2957 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2958 {
2959         uint64_t offloads = 0;
2960         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2961
2962         if (hw->mac.type != ixgbe_mac_82598EB)
2963                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2964
2965         return offloads;
2966 }
2967
2968 uint64_t
2969 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2970 {
2971         uint64_t offloads;
2972         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2973
2974         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2975                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2976                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2977                    DEV_RX_OFFLOAD_KEEP_CRC    |
2978                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2979                    DEV_RX_OFFLOAD_VLAN_FILTER |
2980                    DEV_RX_OFFLOAD_SCATTER |
2981                    DEV_RX_OFFLOAD_RSS_HASH;
2982
2983         if (hw->mac.type == ixgbe_mac_82598EB)
2984                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2985
2986         if (ixgbe_is_vf(dev) == 0)
2987                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2988
2989         /*
2990          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2991          * mode.
2992          */
2993         if ((hw->mac.type == ixgbe_mac_82599EB ||
2994              hw->mac.type == ixgbe_mac_X540 ||
2995              hw->mac.type == ixgbe_mac_X550) &&
2996             !RTE_ETH_DEV_SRIOV(dev).active)
2997                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
2998
2999         if (hw->mac.type == ixgbe_mac_82599EB ||
3000             hw->mac.type == ixgbe_mac_X540)
3001                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3002
3003         if (hw->mac.type == ixgbe_mac_X550 ||
3004             hw->mac.type == ixgbe_mac_X550EM_x ||
3005             hw->mac.type == ixgbe_mac_X550EM_a)
3006                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3007
3008 #ifdef RTE_LIBRTE_SECURITY
3009         if (dev->security_ctx)
3010                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3011 #endif
3012
3013         return offloads;
3014 }
3015
3016 int __attribute__((cold))
3017 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3018                          uint16_t queue_idx,
3019                          uint16_t nb_desc,
3020                          unsigned int socket_id,
3021                          const struct rte_eth_rxconf *rx_conf,
3022                          struct rte_mempool *mp)
3023 {
3024         const struct rte_memzone *rz;
3025         struct ixgbe_rx_queue *rxq;
3026         struct ixgbe_hw     *hw;
3027         uint16_t len;
3028         struct ixgbe_adapter *adapter = dev->data->dev_private;
3029         uint64_t offloads;
3030
3031         PMD_INIT_FUNC_TRACE();
3032         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3033
3034         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3035
3036         /*
3037          * Validate number of receive descriptors.
3038          * It must not exceed hardware maximum, and must be multiple
3039          * of IXGBE_ALIGN.
3040          */
3041         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3042                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3043                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3044                 return -EINVAL;
3045         }
3046
3047         /* Free memory prior to re-allocation if needed... */
3048         if (dev->data->rx_queues[queue_idx] != NULL) {
3049                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3050                 dev->data->rx_queues[queue_idx] = NULL;
3051         }
3052
3053         /* First allocate the rx queue data structure */
3054         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3055                                  RTE_CACHE_LINE_SIZE, socket_id);
3056         if (rxq == NULL)
3057                 return -ENOMEM;
3058         rxq->mb_pool = mp;
3059         rxq->nb_rx_desc = nb_desc;
3060         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3061         rxq->queue_id = queue_idx;
3062         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3063                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3064         rxq->port_id = dev->data->port_id;
3065         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3066                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3067         else
3068                 rxq->crc_len = 0;
3069         rxq->drop_en = rx_conf->rx_drop_en;
3070         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3071         rxq->offloads = offloads;
3072
3073         /*
3074          * The packet type in RX descriptor is different for different NICs.
3075          * Some bits are used for x550 but reserved for other NICS.
3076          * So set different masks for different NICs.
3077          */
3078         if (hw->mac.type == ixgbe_mac_X550 ||
3079             hw->mac.type == ixgbe_mac_X550EM_x ||
3080             hw->mac.type == ixgbe_mac_X550EM_a ||
3081             hw->mac.type == ixgbe_mac_X550_vf ||
3082             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3083             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3084                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3085         else
3086                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3087
3088         /*
3089          * Allocate RX ring hardware descriptors. A memzone large enough to
3090          * handle the maximum ring size is allocated in order to allow for
3091          * resizing in later calls to the queue setup function.
3092          */
3093         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3094                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3095         if (rz == NULL) {
3096                 ixgbe_rx_queue_release(rxq);
3097                 return -ENOMEM;
3098         }
3099
3100         /*
3101          * Zero init all the descriptors in the ring.
3102          */
3103         memset(rz->addr, 0, RX_RING_SZ);
3104
3105         /*
3106          * Modified to setup VFRDT for Virtual Function
3107          */
3108         if (hw->mac.type == ixgbe_mac_82599_vf ||
3109             hw->mac.type == ixgbe_mac_X540_vf ||
3110             hw->mac.type == ixgbe_mac_X550_vf ||
3111             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3112             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3113                 rxq->rdt_reg_addr =
3114                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3115                 rxq->rdh_reg_addr =
3116                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3117         } else {
3118                 rxq->rdt_reg_addr =
3119                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3120                 rxq->rdh_reg_addr =
3121                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3122         }
3123
3124         rxq->rx_ring_phys_addr = rz->iova;
3125         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3126
3127         /*
3128          * Certain constraints must be met in order to use the bulk buffer
3129          * allocation Rx burst function. If any of Rx queues doesn't meet them
3130          * the feature should be disabled for the whole port.
3131          */
3132         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3133                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3134                                     "preconditions - canceling the feature for "
3135                                     "the whole port[%d]",
3136                              rxq->queue_id, rxq->port_id);
3137                 adapter->rx_bulk_alloc_allowed = false;
3138         }
3139
3140         /*
3141          * Allocate software ring. Allow for space at the end of the
3142          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3143          * function does not access an invalid memory region.
3144          */
3145         len = nb_desc;
3146         if (adapter->rx_bulk_alloc_allowed)
3147                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3148
3149         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3150                                           sizeof(struct ixgbe_rx_entry) * len,
3151                                           RTE_CACHE_LINE_SIZE, socket_id);
3152         if (!rxq->sw_ring) {
3153                 ixgbe_rx_queue_release(rxq);
3154                 return -ENOMEM;
3155         }
3156
3157         /*
3158          * Always allocate even if it's not going to be needed in order to
3159          * simplify the code.
3160          *
3161          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3162          * be requested in ixgbe_dev_rx_init(), which is called later from
3163          * dev_start() flow.
3164          */
3165         rxq->sw_sc_ring =
3166                 rte_zmalloc_socket("rxq->sw_sc_ring",
3167                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3168                                    RTE_CACHE_LINE_SIZE, socket_id);
3169         if (!rxq->sw_sc_ring) {
3170                 ixgbe_rx_queue_release(rxq);
3171                 return -ENOMEM;
3172         }
3173
3174         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3175                             "dma_addr=0x%"PRIx64,
3176                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3177                      rxq->rx_ring_phys_addr);
3178
3179         if (!rte_is_power_of_2(nb_desc)) {
3180                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3181                                     "preconditions - canceling the feature for "
3182                                     "the whole port[%d]",
3183                              rxq->queue_id, rxq->port_id);
3184                 adapter->rx_vec_allowed = false;
3185         } else
3186                 ixgbe_rxq_vec_setup(rxq);
3187
3188         dev->data->rx_queues[queue_idx] = rxq;
3189
3190         ixgbe_reset_rx_queue(adapter, rxq);
3191
3192         return 0;
3193 }
3194
3195 uint32_t
3196 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3197 {
3198 #define IXGBE_RXQ_SCAN_INTERVAL 4
3199         volatile union ixgbe_adv_rx_desc *rxdp;
3200         struct ixgbe_rx_queue *rxq;
3201         uint32_t desc = 0;
3202
3203         rxq = dev->data->rx_queues[rx_queue_id];
3204         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3205
3206         while ((desc < rxq->nb_rx_desc) &&
3207                 (rxdp->wb.upper.status_error &
3208                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3209                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3210                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3211                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3212                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3213                                 desc - rxq->nb_rx_desc]);
3214         }
3215
3216         return desc;
3217 }
3218
3219 int
3220 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3221 {
3222         volatile union ixgbe_adv_rx_desc *rxdp;
3223         struct ixgbe_rx_queue *rxq = rx_queue;
3224         uint32_t desc;
3225
3226         if (unlikely(offset >= rxq->nb_rx_desc))
3227                 return 0;
3228         desc = rxq->rx_tail + offset;
3229         if (desc >= rxq->nb_rx_desc)
3230                 desc -= rxq->nb_rx_desc;
3231
3232         rxdp = &rxq->rx_ring[desc];
3233         return !!(rxdp->wb.upper.status_error &
3234                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3235 }
3236
3237 int
3238 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3239 {
3240         struct ixgbe_rx_queue *rxq = rx_queue;
3241         volatile uint32_t *status;
3242         uint32_t nb_hold, desc;
3243
3244         if (unlikely(offset >= rxq->nb_rx_desc))
3245                 return -EINVAL;
3246
3247 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3248         if (rxq->rx_using_sse)
3249                 nb_hold = rxq->rxrearm_nb;
3250         else
3251 #endif
3252                 nb_hold = rxq->nb_rx_hold;
3253         if (offset >= rxq->nb_rx_desc - nb_hold)
3254                 return RTE_ETH_RX_DESC_UNAVAIL;
3255
3256         desc = rxq->rx_tail + offset;
3257         if (desc >= rxq->nb_rx_desc)
3258                 desc -= rxq->nb_rx_desc;
3259
3260         status = &rxq->rx_ring[desc].wb.upper.status_error;
3261         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3262                 return RTE_ETH_RX_DESC_DONE;
3263
3264         return RTE_ETH_RX_DESC_AVAIL;
3265 }
3266
3267 int
3268 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3269 {
3270         struct ixgbe_tx_queue *txq = tx_queue;
3271         volatile uint32_t *status;
3272         uint32_t desc;
3273
3274         if (unlikely(offset >= txq->nb_tx_desc))
3275                 return -EINVAL;
3276
3277         desc = txq->tx_tail + offset;
3278         /* go to next desc that has the RS bit */
3279         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3280                 txq->tx_rs_thresh;
3281         if (desc >= txq->nb_tx_desc) {
3282                 desc -= txq->nb_tx_desc;
3283                 if (desc >= txq->nb_tx_desc)
3284                         desc -= txq->nb_tx_desc;
3285         }
3286
3287         status = &txq->tx_ring[desc].wb.status;
3288         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3289                 return RTE_ETH_TX_DESC_DONE;
3290
3291         return RTE_ETH_TX_DESC_FULL;
3292 }
3293
3294 /*
3295  * Set up link loopback for X540/X550 mode Tx->Rx.
3296  */
3297 static inline void __attribute__((cold))
3298 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3299 {
3300         uint32_t macc;
3301         PMD_INIT_FUNC_TRACE();
3302
3303         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3304
3305         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3306                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3307         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3308
3309         if (enable) {
3310                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3311                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3312                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3313                 macc |= IXGBE_MACC_FLU;
3314         } else {
3315                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3316                 macc &= ~IXGBE_MACC_FLU;
3317         }
3318
3319         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3320                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3321
3322         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3323 }
3324
3325 void __attribute__((cold))
3326 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3327 {
3328         unsigned i;
3329         struct ixgbe_adapter *adapter = dev->data->dev_private;
3330         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3331
3332         PMD_INIT_FUNC_TRACE();
3333
3334         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3335                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3336
3337                 if (txq != NULL) {
3338                         txq->ops->release_mbufs(txq);
3339                         txq->ops->reset(txq);
3340                 }
3341         }
3342
3343         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3344                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3345
3346                 if (rxq != NULL) {
3347                         ixgbe_rx_queue_release_mbufs(rxq);
3348                         ixgbe_reset_rx_queue(adapter, rxq);
3349                 }
3350         }
3351         /* If loopback mode was enabled, reconfigure the link accordingly */
3352         if (dev->data->dev_conf.lpbk_mode != 0) {
3353                 if (hw->mac.type == ixgbe_mac_X540 ||
3354                      hw->mac.type == ixgbe_mac_X550 ||
3355                      hw->mac.type == ixgbe_mac_X550EM_x ||
3356                      hw->mac.type == ixgbe_mac_X550EM_a)
3357                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3358         }
3359 }
3360
3361 void
3362 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3363 {
3364         unsigned i;
3365
3366         PMD_INIT_FUNC_TRACE();
3367
3368         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3369                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3370                 dev->data->rx_queues[i] = NULL;
3371         }
3372         dev->data->nb_rx_queues = 0;
3373
3374         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3375                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3376                 dev->data->tx_queues[i] = NULL;
3377         }
3378         dev->data->nb_tx_queues = 0;
3379 }
3380
3381 /*********************************************************************
3382  *
3383  *  Device RX/TX init functions
3384  *
3385  **********************************************************************/
3386
3387 /**
3388  * Receive Side Scaling (RSS)
3389  * See section 7.1.2.8 in the following document:
3390  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3391  *
3392  * Principles:
3393  * The source and destination IP addresses of the IP header and the source
3394  * and destination ports of TCP/UDP headers, if any, of received packets are
3395  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3396  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3397  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3398  * RSS output index which is used as the RX queue index where to store the
3399  * received packets.
3400  * The following output is supplied in the RX write-back descriptor:
3401  *     - 32-bit result of the Microsoft RSS hash function,
3402  *     - 4-bit RSS type field.
3403  */
3404
3405 /*
3406  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3407  * Used as the default key.
3408  */
3409 static uint8_t rss_intel_key[40] = {
3410         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3411         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3412         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3413         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3414         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3415 };
3416
3417 static void
3418 ixgbe_rss_disable(struct rte_eth_dev *dev)
3419 {
3420         struct ixgbe_hw *hw;
3421         uint32_t mrqc;
3422         uint32_t mrqc_reg;
3423
3424         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3425         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3426         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3427         mrqc &= ~IXGBE_MRQC_RSSEN;
3428         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3429 }
3430
3431 static void
3432 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3433 {
3434         uint8_t  *hash_key;
3435         uint32_t mrqc;
3436         uint32_t rss_key;
3437         uint64_t rss_hf;
3438         uint16_t i;
3439         uint32_t mrqc_reg;
3440         uint32_t rssrk_reg;
3441
3442         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3443         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3444
3445         hash_key = rss_conf->rss_key;
3446         if (hash_key != NULL) {
3447                 /* Fill in RSS hash key */
3448                 for (i = 0; i < 10; i++) {
3449                         rss_key  = hash_key[(i * 4)];
3450                         rss_key |= hash_key[(i * 4) + 1] << 8;
3451                         rss_key |= hash_key[(i * 4) + 2] << 16;
3452                         rss_key |= hash_key[(i * 4) + 3] << 24;
3453                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3454                 }
3455         }
3456
3457         /* Set configured hashing protocols in MRQC register */
3458         rss_hf = rss_conf->rss_hf;
3459         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3460         if (rss_hf & ETH_RSS_IPV4)
3461                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3462         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3463                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3464         if (rss_hf & ETH_RSS_IPV6)
3465                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3466         if (rss_hf & ETH_RSS_IPV6_EX)
3467                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3468         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3469                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3470         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3471                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3472         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3473                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3474         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3475                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3476         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3477                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3478         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3479 }
3480
3481 int
3482 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3483                           struct rte_eth_rss_conf *rss_conf)
3484 {
3485         struct ixgbe_hw *hw;
3486         uint32_t mrqc;
3487         uint64_t rss_hf;
3488         uint32_t mrqc_reg;
3489
3490         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3491
3492         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3493                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3494                         "NIC.");
3495                 return -ENOTSUP;
3496         }
3497         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3498
3499         /*
3500          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3501          *     "RSS enabling cannot be done dynamically while it must be
3502          *      preceded by a software reset"
3503          * Before changing anything, first check that the update RSS operation
3504          * does not attempt to disable RSS, if RSS was enabled at
3505          * initialization time, or does not attempt to enable RSS, if RSS was
3506          * disabled at initialization time.
3507          */
3508         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3509         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3510         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3511                 if (rss_hf != 0) /* Enable RSS */
3512                         return -(EINVAL);
3513                 return 0; /* Nothing to do */
3514         }
3515         /* RSS enabled */
3516         if (rss_hf == 0) /* Disable RSS */
3517                 return -(EINVAL);
3518         ixgbe_hw_rss_hash_set(hw, rss_conf);
3519         return 0;
3520 }
3521
3522 int
3523 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3524                             struct rte_eth_rss_conf *rss_conf)
3525 {
3526         struct ixgbe_hw *hw;
3527         uint8_t *hash_key;
3528         uint32_t mrqc;
3529         uint32_t rss_key;
3530         uint64_t rss_hf;
3531         uint16_t i;
3532         uint32_t mrqc_reg;
3533         uint32_t rssrk_reg;
3534
3535         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3536         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3537         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3538         hash_key = rss_conf->rss_key;
3539         if (hash_key != NULL) {
3540                 /* Return RSS hash key */
3541                 for (i = 0; i < 10; i++) {
3542                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3543                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3544                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3545                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3546                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3547                 }
3548         }
3549
3550         /* Get RSS functions configured in MRQC register */
3551         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3552         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3553                 rss_conf->rss_hf = 0;
3554                 return 0;
3555         }
3556         rss_hf = 0;
3557         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3558                 rss_hf |= ETH_RSS_IPV4;
3559         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3560                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3561         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3562                 rss_hf |= ETH_RSS_IPV6;
3563         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3564                 rss_hf |= ETH_RSS_IPV6_EX;
3565         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3566                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3567         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3568                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3569         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3570                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3571         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3572                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3573         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3574                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3575         rss_conf->rss_hf = rss_hf;
3576         return 0;
3577 }
3578
3579 static void
3580 ixgbe_rss_configure(struct rte_eth_dev *dev)
3581 {
3582         struct rte_eth_rss_conf rss_conf;
3583         struct ixgbe_adapter *adapter;
3584         struct ixgbe_hw *hw;
3585         uint32_t reta;
3586         uint16_t i;
3587         uint16_t j;
3588         uint16_t sp_reta_size;
3589         uint32_t reta_reg;
3590
3591         PMD_INIT_FUNC_TRACE();
3592         adapter = dev->data->dev_private;
3593         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3594
3595         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3596
3597         /*
3598          * Fill in redirection table
3599          * The byte-swap is needed because NIC registers are in
3600          * little-endian order.
3601          */
3602         if (adapter->rss_reta_updated == 0) {
3603                 reta = 0;
3604                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3605                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3606
3607                         if (j == dev->data->nb_rx_queues)
3608                                 j = 0;
3609                         reta = (reta << 8) | j;
3610                         if ((i & 3) == 3)
3611                                 IXGBE_WRITE_REG(hw, reta_reg,
3612                                                 rte_bswap32(reta));
3613                 }
3614         }
3615
3616         /*
3617          * Configure the RSS key and the RSS protocols used to compute
3618          * the RSS hash of input packets.
3619          */
3620         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3621         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3622                 ixgbe_rss_disable(dev);
3623                 return;
3624         }
3625         if (rss_conf.rss_key == NULL)
3626                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3627         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3628 }
3629
3630 #define NUM_VFTA_REGISTERS 128
3631 #define NIC_RX_BUFFER_SIZE 0x200
3632 #define X550_RX_BUFFER_SIZE 0x180
3633
3634 static void
3635 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3636 {
3637         struct rte_eth_vmdq_dcb_conf *cfg;
3638         struct ixgbe_hw *hw;
3639         enum rte_eth_nb_pools num_pools;
3640         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3641         uint16_t pbsize;
3642         uint8_t nb_tcs; /* number of traffic classes */
3643         int i;
3644
3645         PMD_INIT_FUNC_TRACE();
3646         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3647         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3648         num_pools = cfg->nb_queue_pools;
3649         /* Check we have a valid number of pools */
3650         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3651                 ixgbe_rss_disable(dev);
3652                 return;
3653         }
3654         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3655         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3656
3657         /*
3658          * RXPBSIZE
3659          * split rx buffer up into sections, each for 1 traffic class
3660          */
3661         switch (hw->mac.type) {
3662         case ixgbe_mac_X550:
3663         case ixgbe_mac_X550EM_x:
3664         case ixgbe_mac_X550EM_a:
3665                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3666                 break;
3667         default:
3668                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3669                 break;
3670         }
3671         for (i = 0; i < nb_tcs; i++) {
3672                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3673
3674                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3675                 /* clear 10 bits. */
3676                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3677                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3678         }
3679         /* zero alloc all unused TCs */
3680         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3681                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3682
3683                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3684                 /* clear 10 bits. */
3685                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3686         }
3687
3688         /* MRQC: enable vmdq and dcb */
3689         mrqc = (num_pools == ETH_16_POOLS) ?
3690                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3691         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3692
3693         /* PFVTCTL: turn on virtualisation and set the default pool */
3694         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3695         if (cfg->enable_default_pool) {
3696                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3697         } else {
3698                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3699         }
3700
3701         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3702
3703         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3704         queue_mapping = 0;
3705         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3706                 /*
3707                  * mapping is done with 3 bits per priority,
3708                  * so shift by i*3 each time
3709                  */
3710                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3711
3712         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3713
3714         /* RTRPCS: DCB related */
3715         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3716
3717         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3718         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3719         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3720         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3721
3722         /* VFTA - enable all vlan filters */
3723         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3724                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3725         }
3726
3727         /* VFRE: pool enabling for receive - 16 or 32 */
3728         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3729                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3730
3731         /*
3732          * MPSAR - allow pools to read specific mac addresses
3733          * In this case, all pools should be able to read from mac addr 0
3734          */
3735         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3736         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3737
3738         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3739         for (i = 0; i < cfg->nb_pool_maps; i++) {
3740                 /* set vlan id in VF register and set the valid bit */
3741                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3742                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3743                 /*
3744                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3745                  * pools, we only need to use the first half of the register
3746                  * i.e. bits 0-31
3747                  */
3748                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3749         }
3750 }
3751
3752 /**
3753  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3754  * @dev: pointer to eth_dev structure
3755  * @dcb_config: pointer to ixgbe_dcb_config structure
3756  */
3757 static void
3758 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3759                        struct ixgbe_dcb_config *dcb_config)
3760 {
3761         uint32_t reg;
3762         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3763
3764         PMD_INIT_FUNC_TRACE();
3765         if (hw->mac.type != ixgbe_mac_82598EB) {
3766                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3767                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3768                 reg |= IXGBE_RTTDCS_ARBDIS;
3769                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3770
3771                 /* Enable DCB for Tx with 8 TCs */
3772                 if (dcb_config->num_tcs.pg_tcs == 8) {
3773                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3774                 } else {
3775                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3776                 }
3777                 if (dcb_config->vt_mode)
3778                         reg |= IXGBE_MTQC_VT_ENA;
3779                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3780
3781                 /* Enable the Tx desc arbiter */
3782                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3783                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3784                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3785
3786                 /* Enable Security TX Buffer IFG for DCB */
3787                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3788                 reg |= IXGBE_SECTX_DCB;
3789                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3790         }
3791 }
3792
3793 /**
3794  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3795  * @dev: pointer to rte_eth_dev structure
3796  * @dcb_config: pointer to ixgbe_dcb_config structure
3797  */
3798 static void
3799 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3800                         struct ixgbe_dcb_config *dcb_config)
3801 {
3802         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3803                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3804         struct ixgbe_hw *hw =
3805                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3806
3807         PMD_INIT_FUNC_TRACE();
3808         if (hw->mac.type != ixgbe_mac_82598EB)
3809                 /*PF VF Transmit Enable*/
3810                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3811                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3812
3813         /*Configure general DCB TX parameters*/
3814         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3815 }
3816
3817 static void
3818 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3819                         struct ixgbe_dcb_config *dcb_config)
3820 {
3821         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3822                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3823         struct ixgbe_dcb_tc_config *tc;
3824         uint8_t i, j;
3825
3826         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3827         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3828                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3829                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3830         } else {
3831                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3832                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3833         }
3834
3835         /* Initialize User Priority to Traffic Class mapping */
3836         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3837                 tc = &dcb_config->tc_config[j];
3838                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3839         }
3840
3841         /* User Priority to Traffic Class mapping */
3842         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3843                 j = vmdq_rx_conf->dcb_tc[i];
3844                 tc = &dcb_config->tc_config[j];
3845                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3846                                                 (uint8_t)(1 << i);
3847         }
3848 }
3849
3850 static void
3851 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3852                         struct ixgbe_dcb_config *dcb_config)
3853 {
3854         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3855                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3856         struct ixgbe_dcb_tc_config *tc;
3857         uint8_t i, j;
3858
3859         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3860         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3861                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3862                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3863         } else {
3864                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3865                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3866         }
3867
3868         /* Initialize User Priority to Traffic Class mapping */
3869         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3870                 tc = &dcb_config->tc_config[j];
3871                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3872         }
3873
3874         /* User Priority to Traffic Class mapping */
3875         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3876                 j = vmdq_tx_conf->dcb_tc[i];
3877                 tc = &dcb_config->tc_config[j];
3878                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3879                                                 (uint8_t)(1 << i);
3880         }
3881 }
3882
3883 static void
3884 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3885                 struct ixgbe_dcb_config *dcb_config)
3886 {
3887         struct rte_eth_dcb_rx_conf *rx_conf =
3888                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3889         struct ixgbe_dcb_tc_config *tc;
3890         uint8_t i, j;
3891
3892         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3893         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3894
3895         /* Initialize User Priority to Traffic Class mapping */
3896         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3897                 tc = &dcb_config->tc_config[j];
3898                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3899         }
3900
3901         /* User Priority to Traffic Class mapping */
3902         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3903                 j = rx_conf->dcb_tc[i];
3904                 tc = &dcb_config->tc_config[j];
3905                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3906                                                 (uint8_t)(1 << i);
3907         }
3908 }
3909
3910 static void
3911 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3912                 struct ixgbe_dcb_config *dcb_config)
3913 {
3914         struct rte_eth_dcb_tx_conf *tx_conf =
3915                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3916         struct ixgbe_dcb_tc_config *tc;
3917         uint8_t i, j;
3918
3919         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3920         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3921
3922         /* Initialize User Priority to Traffic Class mapping */
3923         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3924                 tc = &dcb_config->tc_config[j];
3925                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3926         }
3927
3928         /* User Priority to Traffic Class mapping */
3929         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3930                 j = tx_conf->dcb_tc[i];
3931                 tc = &dcb_config->tc_config[j];
3932                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3933                                                 (uint8_t)(1 << i);
3934         }
3935 }
3936
3937 /**
3938  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3939  * @dev: pointer to eth_dev structure
3940  * @dcb_config: pointer to ixgbe_dcb_config structure
3941  */
3942 static void
3943 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3944                        struct ixgbe_dcb_config *dcb_config)
3945 {
3946         uint32_t reg;
3947         uint32_t vlanctrl;
3948         uint8_t i;
3949         uint32_t q;
3950         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3951
3952         PMD_INIT_FUNC_TRACE();
3953         /*
3954          * Disable the arbiter before changing parameters
3955          * (always enable recycle mode; WSP)
3956          */
3957         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3958         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3959
3960         if (hw->mac.type != ixgbe_mac_82598EB) {
3961                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3962                 if (dcb_config->num_tcs.pg_tcs == 4) {
3963                         if (dcb_config->vt_mode)
3964                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3965                                         IXGBE_MRQC_VMDQRT4TCEN;
3966                         else {
3967                                 /* no matter the mode is DCB or DCB_RSS, just
3968                                  * set the MRQE to RSSXTCEN. RSS is controlled
3969                                  * by RSS_FIELD
3970                                  */
3971                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3972                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3973                                         IXGBE_MRQC_RTRSS4TCEN;
3974                         }
3975                 }
3976                 if (dcb_config->num_tcs.pg_tcs == 8) {
3977                         if (dcb_config->vt_mode)
3978                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3979                                         IXGBE_MRQC_VMDQRT8TCEN;
3980                         else {
3981                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3982                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3983                                         IXGBE_MRQC_RTRSS8TCEN;
3984                         }
3985                 }
3986
3987                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3988
3989                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3990                         /* Disable drop for all queues in VMDQ mode*/
3991                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3992                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3993                                                 (IXGBE_QDE_WRITE |
3994                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3995                 } else {
3996                         /* Enable drop for all queues in SRIOV mode */
3997                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3998                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3999                                                 (IXGBE_QDE_WRITE |
4000                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4001                                                  IXGBE_QDE_ENABLE));
4002                 }
4003         }
4004
4005         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4006         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4007         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4008         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4009
4010         /* VFTA - enable all vlan filters */
4011         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4012                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4013         }
4014
4015         /*
4016          * Configure Rx packet plane (recycle mode; WSP) and
4017          * enable arbiter
4018          */
4019         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4020         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4021 }
4022
4023 static void
4024 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4025                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4026 {
4027         switch (hw->mac.type) {
4028         case ixgbe_mac_82598EB:
4029                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4030                 break;
4031         case ixgbe_mac_82599EB:
4032         case ixgbe_mac_X540:
4033         case ixgbe_mac_X550:
4034         case ixgbe_mac_X550EM_x:
4035         case ixgbe_mac_X550EM_a:
4036                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4037                                                   tsa, map);
4038                 break;
4039         default:
4040                 break;
4041         }
4042 }
4043
4044 static void
4045 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4046                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4047 {
4048         switch (hw->mac.type) {
4049         case ixgbe_mac_82598EB:
4050                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4051                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4052                 break;
4053         case ixgbe_mac_82599EB:
4054         case ixgbe_mac_X540:
4055         case ixgbe_mac_X550:
4056         case ixgbe_mac_X550EM_x:
4057         case ixgbe_mac_X550EM_a:
4058                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4059                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4060                 break;
4061         default:
4062                 break;
4063         }
4064 }
4065
4066 #define DCB_RX_CONFIG  1
4067 #define DCB_TX_CONFIG  1
4068 #define DCB_TX_PB      1024
4069 /**
4070  * ixgbe_dcb_hw_configure - Enable DCB and configure
4071  * general DCB in VT mode and non-VT mode parameters
4072  * @dev: pointer to rte_eth_dev structure
4073  * @dcb_config: pointer to ixgbe_dcb_config structure
4074  */
4075 static int
4076 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4077                         struct ixgbe_dcb_config *dcb_config)
4078 {
4079         int     ret = 0;
4080         uint8_t i, pfc_en, nb_tcs;
4081         uint16_t pbsize, rx_buffer_size;
4082         uint8_t config_dcb_rx = 0;
4083         uint8_t config_dcb_tx = 0;
4084         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4085         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4086         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4087         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4088         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4089         struct ixgbe_dcb_tc_config *tc;
4090         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4091                 RTE_ETHER_CRC_LEN;
4092         struct ixgbe_hw *hw =
4093                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4094         struct ixgbe_bw_conf *bw_conf =
4095                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4096
4097         switch (dev->data->dev_conf.rxmode.mq_mode) {
4098         case ETH_MQ_RX_VMDQ_DCB:
4099                 dcb_config->vt_mode = true;
4100                 if (hw->mac.type != ixgbe_mac_82598EB) {
4101                         config_dcb_rx = DCB_RX_CONFIG;
4102                         /*
4103                          *get dcb and VT rx configuration parameters
4104                          *from rte_eth_conf
4105                          */
4106                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4107                         /*Configure general VMDQ and DCB RX parameters*/
4108                         ixgbe_vmdq_dcb_configure(dev);
4109                 }
4110                 break;
4111         case ETH_MQ_RX_DCB:
4112         case ETH_MQ_RX_DCB_RSS:
4113                 dcb_config->vt_mode = false;
4114                 config_dcb_rx = DCB_RX_CONFIG;
4115                 /* Get dcb TX configuration parameters from rte_eth_conf */
4116                 ixgbe_dcb_rx_config(dev, dcb_config);
4117                 /*Configure general DCB RX parameters*/
4118                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4119                 break;
4120         default:
4121                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4122                 break;
4123         }
4124         switch (dev->data->dev_conf.txmode.mq_mode) {
4125         case ETH_MQ_TX_VMDQ_DCB:
4126                 dcb_config->vt_mode = true;
4127                 config_dcb_tx = DCB_TX_CONFIG;
4128                 /* get DCB and VT TX configuration parameters
4129                  * from rte_eth_conf
4130                  */
4131                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4132                 /*Configure general VMDQ and DCB TX parameters*/
4133                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4134                 break;
4135
4136         case ETH_MQ_TX_DCB:
4137                 dcb_config->vt_mode = false;
4138                 config_dcb_tx = DCB_TX_CONFIG;
4139                 /*get DCB TX configuration parameters from rte_eth_conf*/
4140                 ixgbe_dcb_tx_config(dev, dcb_config);
4141                 /*Configure general DCB TX parameters*/
4142                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4143                 break;
4144         default:
4145                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4146                 break;
4147         }
4148
4149         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4150         /* Unpack map */
4151         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4152         if (nb_tcs == ETH_4_TCS) {
4153                 /* Avoid un-configured priority mapping to TC0 */
4154                 uint8_t j = 4;
4155                 uint8_t mask = 0xFF;
4156
4157                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4158                         mask = (uint8_t)(mask & (~(1 << map[i])));
4159                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4160                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4161                                 map[j++] = i;
4162                         mask >>= 1;
4163                 }
4164                 /* Re-configure 4 TCs BW */
4165                 for (i = 0; i < nb_tcs; i++) {
4166                         tc = &dcb_config->tc_config[i];
4167                         if (bw_conf->tc_num != nb_tcs)
4168                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4169                                         (uint8_t)(100 / nb_tcs);
4170                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4171                                                 (uint8_t)(100 / nb_tcs);
4172                 }
4173                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4174                         tc = &dcb_config->tc_config[i];
4175                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4176                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4177                 }
4178         } else {
4179                 /* Re-configure 8 TCs BW */
4180                 for (i = 0; i < nb_tcs; i++) {
4181                         tc = &dcb_config->tc_config[i];
4182                         if (bw_conf->tc_num != nb_tcs)
4183                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4184                                         (uint8_t)(100 / nb_tcs + (i & 1));
4185                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4186                                 (uint8_t)(100 / nb_tcs + (i & 1));
4187                 }
4188         }
4189
4190         switch (hw->mac.type) {
4191         case ixgbe_mac_X550:
4192         case ixgbe_mac_X550EM_x:
4193         case ixgbe_mac_X550EM_a:
4194                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4195                 break;
4196         default:
4197                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4198                 break;
4199         }
4200
4201         if (config_dcb_rx) {
4202                 /* Set RX buffer size */
4203                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4204                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4205
4206                 for (i = 0; i < nb_tcs; i++) {
4207                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4208                 }
4209                 /* zero alloc all unused TCs */
4210                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4211                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4212                 }
4213         }
4214         if (config_dcb_tx) {
4215                 /* Only support an equally distributed
4216                  *  Tx packet buffer strategy.
4217                  */
4218                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4219                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4220
4221                 for (i = 0; i < nb_tcs; i++) {
4222                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4223                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4224                 }
4225                 /* Clear unused TCs, if any, to zero buffer size*/
4226                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4227                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4228                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4229                 }
4230         }
4231
4232         /*Calculates traffic class credits*/
4233         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4234                                 IXGBE_DCB_TX_CONFIG);
4235         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4236                                 IXGBE_DCB_RX_CONFIG);
4237
4238         if (config_dcb_rx) {
4239                 /* Unpack CEE standard containers */
4240                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4241                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4242                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4243                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4244                 /* Configure PG(ETS) RX */
4245                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4246         }
4247
4248         if (config_dcb_tx) {
4249                 /* Unpack CEE standard containers */
4250                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4251                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4252                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4253                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4254                 /* Configure PG(ETS) TX */
4255                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4256         }
4257
4258         /*Configure queue statistics registers*/
4259         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4260
4261         /* Check if the PFC is supported */
4262         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4263                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4264                 for (i = 0; i < nb_tcs; i++) {
4265                         /*
4266                         * If the TC count is 8,and the default high_water is 48,
4267                         * the low_water is 16 as default.
4268                         */
4269                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4270                         hw->fc.low_water[i] = pbsize / 4;
4271                         /* Enable pfc for this TC */
4272                         tc = &dcb_config->tc_config[i];
4273                         tc->pfc = ixgbe_dcb_pfc_enabled;
4274                 }
4275                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4276                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4277                         pfc_en &= 0x0F;
4278                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4279         }
4280
4281         return ret;
4282 }
4283
4284 /**
4285  * ixgbe_configure_dcb - Configure DCB  Hardware
4286  * @dev: pointer to rte_eth_dev
4287  */
4288 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4289 {
4290         struct ixgbe_dcb_config *dcb_cfg =
4291                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4292         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4293
4294         PMD_INIT_FUNC_TRACE();
4295
4296         /* check support mq_mode for DCB */
4297         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4298             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4299             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4300                 return;
4301
4302         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4303                 return;
4304
4305         /** Configure DCB hardware **/
4306         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4307 }
4308
4309 /*
4310  * VMDq only support for 10 GbE NIC.
4311  */
4312 static void
4313 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4314 {
4315         struct rte_eth_vmdq_rx_conf *cfg;
4316         struct ixgbe_hw *hw;
4317         enum rte_eth_nb_pools num_pools;
4318         uint32_t mrqc, vt_ctl, vlanctrl;
4319         uint32_t vmolr = 0;
4320         int i;
4321
4322         PMD_INIT_FUNC_TRACE();
4323         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4324         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4325         num_pools = cfg->nb_queue_pools;
4326
4327         ixgbe_rss_disable(dev);
4328
4329         /* MRQC: enable vmdq */
4330         mrqc = IXGBE_MRQC_VMDQEN;
4331         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4332
4333         /* PFVTCTL: turn on virtualisation and set the default pool */
4334         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4335         if (cfg->enable_default_pool)
4336                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4337         else
4338                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4339
4340         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4341
4342         for (i = 0; i < (int)num_pools; i++) {
4343                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4344                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4345         }
4346
4347         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4348         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4349         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4350         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4351
4352         /* VFTA - enable all vlan filters */
4353         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4354                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4355
4356         /* VFRE: pool enabling for receive - 64 */
4357         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4358         if (num_pools == ETH_64_POOLS)
4359                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4360
4361         /*
4362          * MPSAR - allow pools to read specific mac addresses
4363          * In this case, all pools should be able to read from mac addr 0
4364          */
4365         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4366         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4367
4368         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4369         for (i = 0; i < cfg->nb_pool_maps; i++) {
4370                 /* set vlan id in VF register and set the valid bit */
4371                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4372                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4373                 /*
4374                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4375                  * pools, we only need to use the first half of the register
4376                  * i.e. bits 0-31
4377                  */
4378                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4379                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4380                                         (cfg->pool_map[i].pools & UINT32_MAX));
4381                 else
4382                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4383                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4384
4385         }
4386
4387         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4388         if (cfg->enable_loop_back) {
4389                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4390                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4391                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4392         }
4393
4394         IXGBE_WRITE_FLUSH(hw);
4395 }
4396
4397 /*
4398  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4399  * @hw: pointer to hardware structure
4400  */
4401 static void
4402 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4403 {
4404         uint32_t reg;
4405         uint32_t q;
4406
4407         PMD_INIT_FUNC_TRACE();
4408         /*PF VF Transmit Enable*/
4409         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4410         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4411
4412         /* Disable the Tx desc arbiter so that MTQC can be changed */
4413         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4414         reg |= IXGBE_RTTDCS_ARBDIS;
4415         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4416
4417         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4418         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4419
4420         /* Disable drop for all queues */
4421         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4422                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4423                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4424
4425         /* Enable the Tx desc arbiter */
4426         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4427         reg &= ~IXGBE_RTTDCS_ARBDIS;
4428         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4429
4430         IXGBE_WRITE_FLUSH(hw);
4431 }
4432
4433 static int __attribute__((cold))
4434 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4435 {
4436         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4437         uint64_t dma_addr;
4438         unsigned int i;
4439
4440         /* Initialize software ring entries */
4441         for (i = 0; i < rxq->nb_rx_desc; i++) {
4442                 volatile union ixgbe_adv_rx_desc *rxd;
4443                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4444
4445                 if (mbuf == NULL) {
4446                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4447                                      (unsigned) rxq->queue_id);
4448                         return -ENOMEM;
4449                 }
4450
4451                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4452                 mbuf->port = rxq->port_id;
4453
4454                 dma_addr =
4455                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4456                 rxd = &rxq->rx_ring[i];
4457                 rxd->read.hdr_addr = 0;
4458                 rxd->read.pkt_addr = dma_addr;
4459                 rxe[i].mbuf = mbuf;
4460         }
4461
4462         return 0;
4463 }
4464
4465 static int
4466 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4467 {
4468         struct ixgbe_hw *hw;
4469         uint32_t mrqc;
4470
4471         ixgbe_rss_configure(dev);
4472
4473         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4474
4475         /* MRQC: enable VF RSS */
4476         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4477         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4478         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4479         case ETH_64_POOLS:
4480                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4481                 break;
4482
4483         case ETH_32_POOLS:
4484                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4485                 break;
4486
4487         default:
4488                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4489                 return -EINVAL;
4490         }
4491
4492         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4493
4494         return 0;
4495 }
4496
4497 static int
4498 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4499 {
4500         struct ixgbe_hw *hw =
4501                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4502
4503         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4504         case ETH_64_POOLS:
4505                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4506                         IXGBE_MRQC_VMDQEN);
4507                 break;
4508
4509         case ETH_32_POOLS:
4510                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4511                         IXGBE_MRQC_VMDQRT4TCEN);
4512                 break;
4513
4514         case ETH_16_POOLS:
4515                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4516                         IXGBE_MRQC_VMDQRT8TCEN);
4517                 break;
4518         default:
4519                 PMD_INIT_LOG(ERR,
4520                         "invalid pool number in IOV mode");
4521                 break;
4522         }
4523         return 0;
4524 }
4525
4526 static int
4527 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4528 {
4529         struct ixgbe_hw *hw =
4530                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4531
4532         if (hw->mac.type == ixgbe_mac_82598EB)
4533                 return 0;
4534
4535         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4536                 /*
4537                  * SRIOV inactive scheme
4538                  * any DCB/RSS w/o VMDq multi-queue setting
4539                  */
4540                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4541                 case ETH_MQ_RX_RSS:
4542                 case ETH_MQ_RX_DCB_RSS:
4543                 case ETH_MQ_RX_VMDQ_RSS:
4544                         ixgbe_rss_configure(dev);
4545                         break;
4546
4547                 case ETH_MQ_RX_VMDQ_DCB:
4548                         ixgbe_vmdq_dcb_configure(dev);
4549                         break;
4550
4551                 case ETH_MQ_RX_VMDQ_ONLY:
4552                         ixgbe_vmdq_rx_hw_configure(dev);
4553                         break;
4554
4555                 case ETH_MQ_RX_NONE:
4556                 default:
4557                         /* if mq_mode is none, disable rss mode.*/
4558                         ixgbe_rss_disable(dev);
4559                         break;
4560                 }
4561         } else {
4562                 /* SRIOV active scheme
4563                  * Support RSS together with SRIOV.
4564                  */
4565                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4566                 case ETH_MQ_RX_RSS:
4567                 case ETH_MQ_RX_VMDQ_RSS:
4568                         ixgbe_config_vf_rss(dev);
4569                         break;
4570                 case ETH_MQ_RX_VMDQ_DCB:
4571                 case ETH_MQ_RX_DCB:
4572                 /* In SRIOV, the configuration is the same as VMDq case */
4573                         ixgbe_vmdq_dcb_configure(dev);
4574                         break;
4575                 /* DCB/RSS together with SRIOV is not supported */
4576                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4577                 case ETH_MQ_RX_DCB_RSS:
4578                         PMD_INIT_LOG(ERR,
4579                                 "Could not support DCB/RSS with VMDq & SRIOV");
4580                         return -1;
4581                 default:
4582                         ixgbe_config_vf_default(dev);
4583                         break;
4584                 }
4585         }
4586
4587         return 0;
4588 }
4589
4590 static int
4591 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4592 {
4593         struct ixgbe_hw *hw =
4594                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4595         uint32_t mtqc;
4596         uint32_t rttdcs;
4597
4598         if (hw->mac.type == ixgbe_mac_82598EB)
4599                 return 0;
4600
4601         /* disable arbiter before setting MTQC */
4602         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4603         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4604         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4605
4606         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4607                 /*
4608                  * SRIOV inactive scheme
4609                  * any DCB w/o VMDq multi-queue setting
4610                  */
4611                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4612                         ixgbe_vmdq_tx_hw_configure(hw);
4613                 else {
4614                         mtqc = IXGBE_MTQC_64Q_1PB;
4615                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4616                 }
4617         } else {
4618                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4619
4620                 /*
4621                  * SRIOV active scheme
4622                  * FIXME if support DCB together with VMDq & SRIOV
4623                  */
4624                 case ETH_64_POOLS:
4625                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4626                         break;
4627                 case ETH_32_POOLS:
4628                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4629                         break;
4630                 case ETH_16_POOLS:
4631                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4632                                 IXGBE_MTQC_8TC_8TQ;
4633                         break;
4634                 default:
4635                         mtqc = IXGBE_MTQC_64Q_1PB;
4636                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4637                 }
4638                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4639         }
4640
4641         /* re-enable arbiter */
4642         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4643         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4644
4645         return 0;
4646 }
4647
4648 /**
4649  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4650  *
4651  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4652  * spec rev. 3.0 chapter 8.2.3.8.13.
4653  *
4654  * @pool Memory pool of the Rx queue
4655  */
4656 static inline uint32_t
4657 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4658 {
4659         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4660
4661         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4662         uint16_t maxdesc =
4663                 RTE_IPV4_MAX_PKT_LEN /
4664                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4665
4666         if (maxdesc >= 16)
4667                 return IXGBE_RSCCTL_MAXDESC_16;
4668         else if (maxdesc >= 8)
4669                 return IXGBE_RSCCTL_MAXDESC_8;
4670         else if (maxdesc >= 4)
4671                 return IXGBE_RSCCTL_MAXDESC_4;
4672         else
4673                 return IXGBE_RSCCTL_MAXDESC_1;
4674 }
4675
4676 /**
4677  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4678  * interrupt
4679  *
4680  * (Taken from FreeBSD tree)
4681  * (yes this is all very magic and confusing :)
4682  *
4683  * @dev port handle
4684  * @entry the register array entry
4685  * @vector the MSIX vector for this queue
4686  * @type RX/TX/MISC
4687  */
4688 static void
4689 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4690 {
4691         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4692         u32 ivar, index;
4693
4694         vector |= IXGBE_IVAR_ALLOC_VAL;
4695
4696         switch (hw->mac.type) {
4697
4698         case ixgbe_mac_82598EB:
4699                 if (type == -1)
4700                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4701                 else
4702                         entry += (type * 64);
4703                 index = (entry >> 2) & 0x1F;
4704                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4705                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4706                 ivar |= (vector << (8 * (entry & 0x3)));
4707                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4708                 break;
4709
4710         case ixgbe_mac_82599EB:
4711         case ixgbe_mac_X540:
4712                 if (type == -1) { /* MISC IVAR */
4713                         index = (entry & 1) * 8;
4714                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4715                         ivar &= ~(0xFF << index);
4716                         ivar |= (vector << index);
4717                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4718                 } else {        /* RX/TX IVARS */
4719                         index = (16 * (entry & 1)) + (8 * type);
4720                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4721                         ivar &= ~(0xFF << index);
4722                         ivar |= (vector << index);
4723                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4724                 }
4725
4726                 break;
4727
4728         default:
4729                 break;
4730         }
4731 }
4732
4733 void __attribute__((cold))
4734 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4735 {
4736         uint16_t i, rx_using_sse;
4737         struct ixgbe_adapter *adapter = dev->data->dev_private;
4738
4739         /*
4740          * In order to allow Vector Rx there are a few configuration
4741          * conditions to be met and Rx Bulk Allocation should be allowed.
4742          */
4743         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4744             !adapter->rx_bulk_alloc_allowed) {
4745                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4746                                     "preconditions",
4747                              dev->data->port_id);
4748
4749                 adapter->rx_vec_allowed = false;
4750         }
4751
4752         /*
4753          * Initialize the appropriate LRO callback.
4754          *
4755          * If all queues satisfy the bulk allocation preconditions
4756          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4757          * Otherwise use a single allocation version.
4758          */
4759         if (dev->data->lro) {
4760                 if (adapter->rx_bulk_alloc_allowed) {
4761                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4762                                            "allocation version");
4763                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4764                 } else {
4765                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4766                                            "allocation version");
4767                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4768                 }
4769         } else if (dev->data->scattered_rx) {
4770                 /*
4771                  * Set the non-LRO scattered callback: there are Vector and
4772                  * single allocation versions.
4773                  */
4774                 if (adapter->rx_vec_allowed) {
4775                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4776                                             "callback (port=%d).",
4777                                      dev->data->port_id);
4778
4779                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4780                 } else if (adapter->rx_bulk_alloc_allowed) {
4781                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4782                                            "allocation callback (port=%d).",
4783                                      dev->data->port_id);
4784                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4785                 } else {
4786                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4787                                             "single allocation) "
4788                                             "Scattered Rx callback "
4789                                             "(port=%d).",
4790                                      dev->data->port_id);
4791
4792                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4793                 }
4794         /*
4795          * Below we set "simple" callbacks according to port/queues parameters.
4796          * If parameters allow we are going to choose between the following
4797          * callbacks:
4798          *    - Vector
4799          *    - Bulk Allocation
4800          *    - Single buffer allocation (the simplest one)
4801          */
4802         } else if (adapter->rx_vec_allowed) {
4803                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4804                                     "burst size no less than %d (port=%d).",
4805                              RTE_IXGBE_DESCS_PER_LOOP,
4806                              dev->data->port_id);
4807
4808                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4809         } else if (adapter->rx_bulk_alloc_allowed) {
4810                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4811                                     "satisfied. Rx Burst Bulk Alloc function "
4812                                     "will be used on port=%d.",
4813                              dev->data->port_id);
4814
4815                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4816         } else {
4817                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4818                                     "satisfied, or Scattered Rx is requested "
4819                                     "(port=%d).",
4820                              dev->data->port_id);
4821
4822                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4823         }
4824
4825         /* Propagate information about RX function choice through all queues. */
4826
4827         rx_using_sse =
4828                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4829                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4830
4831         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4832                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4833
4834                 rxq->rx_using_sse = rx_using_sse;
4835 #ifdef RTE_LIBRTE_SECURITY
4836                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4837                                 DEV_RX_OFFLOAD_SECURITY);
4838 #endif
4839         }
4840 }
4841
4842 /**
4843  * ixgbe_set_rsc - configure RSC related port HW registers
4844  *
4845  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4846  * of 82599 Spec (x540 configuration is virtually the same).
4847  *
4848  * @dev port handle
4849  *
4850  * Returns 0 in case of success or a non-zero error code
4851  */
4852 static int
4853 ixgbe_set_rsc(struct rte_eth_dev *dev)
4854 {
4855         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4856         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4857         struct rte_eth_dev_info dev_info = { 0 };
4858         bool rsc_capable = false;
4859         uint16_t i;
4860         uint32_t rdrxctl;
4861         uint32_t rfctl;
4862
4863         /* Sanity check */
4864         dev->dev_ops->dev_infos_get(dev, &dev_info);
4865         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4866                 rsc_capable = true;
4867
4868         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4869                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4870                                    "support it");
4871                 return -EINVAL;
4872         }
4873
4874         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4875
4876         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4877              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4878                 /*
4879                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4880                  * 3.0 RSC configuration requires HW CRC stripping being
4881                  * enabled. If user requested both HW CRC stripping off
4882                  * and RSC on - return an error.
4883                  */
4884                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4885                                     "is disabled");
4886                 return -EINVAL;
4887         }
4888
4889         /* RFCTL configuration  */
4890         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4891         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4892                 /*
4893                  * Since NFS packets coalescing is not supported - clear
4894                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4895                  * enabled.
4896                  */
4897                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4898                            IXGBE_RFCTL_NFSR_DIS);
4899         else
4900                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4901         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4902
4903         /* If LRO hasn't been requested - we are done here. */
4904         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4905                 return 0;
4906
4907         /* Set RDRXCTL.RSCACKC bit */
4908         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4909         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4910         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4911
4912         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4913         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4914                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4915                 uint32_t srrctl =
4916                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4917                 uint32_t rscctl =
4918                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4919                 uint32_t psrtype =
4920                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4921                 uint32_t eitr =
4922                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4923
4924                 /*
4925                  * ixgbe PMD doesn't support header-split at the moment.
4926                  *
4927                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4928                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4929                  * should be configured even if header split is not
4930                  * enabled. We will configure it 128 bytes following the
4931                  * recommendation in the spec.
4932                  */
4933                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4934                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4935                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4936
4937                 /*
4938                  * TODO: Consider setting the Receive Descriptor Minimum
4939                  * Threshold Size for an RSC case. This is not an obviously
4940                  * beneficiary option but the one worth considering...
4941                  */
4942
4943                 rscctl |= IXGBE_RSCCTL_RSCEN;
4944                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4945                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4946
4947                 /*
4948                  * RSC: Set ITR interval corresponding to 2K ints/s.
4949                  *
4950                  * Full-sized RSC aggregations for a 10Gb/s link will
4951                  * arrive at about 20K aggregation/s rate.
4952                  *
4953                  * 2K inst/s rate will make only 10% of the
4954                  * aggregations to be closed due to the interrupt timer
4955                  * expiration for a streaming at wire-speed case.
4956                  *
4957                  * For a sparse streaming case this setting will yield
4958                  * at most 500us latency for a single RSC aggregation.
4959                  */
4960                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4961                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4962                 eitr |= IXGBE_EITR_CNT_WDIS;
4963
4964                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4965                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4966                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4967                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4968
4969                 /*
4970                  * RSC requires the mapping of the queue to the
4971                  * interrupt vector.
4972                  */
4973                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4974         }
4975
4976         dev->data->lro = 1;
4977
4978         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4979
4980         return 0;
4981 }
4982
4983 /*
4984  * Initializes Receive Unit.
4985  */
4986 int __attribute__((cold))
4987 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4988 {
4989         struct ixgbe_hw     *hw;
4990         struct ixgbe_rx_queue *rxq;
4991         uint64_t bus_addr;
4992         uint32_t rxctrl;
4993         uint32_t fctrl;
4994         uint32_t hlreg0;
4995         uint32_t maxfrs;
4996         uint32_t srrctl;
4997         uint32_t rdrxctl;
4998         uint32_t rxcsum;
4999         uint16_t buf_size;
5000         uint16_t i;
5001         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5002         int rc;
5003
5004         PMD_INIT_FUNC_TRACE();
5005         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5006
5007         /*
5008          * Make sure receives are disabled while setting
5009          * up the RX context (registers, descriptor rings, etc.).
5010          */
5011         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5012         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5013
5014         /* Enable receipt of broadcasted frames */
5015         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5016         fctrl |= IXGBE_FCTRL_BAM;
5017         fctrl |= IXGBE_FCTRL_DPF;
5018         fctrl |= IXGBE_FCTRL_PMCF;
5019         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5020
5021         /*
5022          * Configure CRC stripping, if any.
5023          */
5024         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5025         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5026                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5027         else
5028                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5029
5030         /*
5031          * Configure jumbo frame support, if any.
5032          */
5033         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5034                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5035                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5036                 maxfrs &= 0x0000FFFF;
5037                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5038                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5039         } else
5040                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5041
5042         /*
5043          * If loopback mode is configured, set LPBK bit.
5044          */
5045         if (dev->data->dev_conf.lpbk_mode != 0) {
5046                 rc = ixgbe_check_supported_loopback_mode(dev);
5047                 if (rc < 0) {
5048                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5049                         return rc;
5050                 }
5051                 hlreg0 |= IXGBE_HLREG0_LPBK;
5052         } else {
5053                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5054         }
5055
5056         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5057
5058         /*
5059          * Assume no header split and no VLAN strip support
5060          * on any Rx queue first .
5061          */
5062         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5063         /* Setup RX queues */
5064         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5065                 rxq = dev->data->rx_queues[i];
5066
5067                 /*
5068                  * Reset crc_len in case it was changed after queue setup by a
5069                  * call to configure.
5070                  */
5071                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5072                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5073                 else
5074                         rxq->crc_len = 0;
5075
5076                 /* Setup the Base and Length of the Rx Descriptor Rings */
5077                 bus_addr = rxq->rx_ring_phys_addr;
5078                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5079                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5080                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5081                                 (uint32_t)(bus_addr >> 32));
5082                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5083                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5084                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5085                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5086
5087                 /* Configure the SRRCTL register */
5088                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5089
5090                 /* Set if packets are dropped when no descriptors available */
5091                 if (rxq->drop_en)
5092                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5093
5094                 /*
5095                  * Configure the RX buffer size in the BSIZEPACKET field of
5096                  * the SRRCTL register of the queue.
5097                  * The value is in 1 KB resolution. Valid values can be from
5098                  * 1 KB to 16 KB.
5099                  */
5100                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5101                         RTE_PKTMBUF_HEADROOM);
5102                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5103                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5104
5105                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5106
5107                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5108                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5109
5110                 /* It adds dual VLAN length for supporting dual VLAN */
5111                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5112                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5113                         dev->data->scattered_rx = 1;
5114                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5115                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5116         }
5117
5118         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5119                 dev->data->scattered_rx = 1;
5120
5121         /*
5122          * Device configured with multiple RX queues.
5123          */
5124         ixgbe_dev_mq_rx_configure(dev);
5125
5126         /*
5127          * Setup the Checksum Register.
5128          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5129          * Enable IP/L4 checkum computation by hardware if requested to do so.
5130          */
5131         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5132         rxcsum |= IXGBE_RXCSUM_PCSD;
5133         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5134                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5135         else
5136                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5137
5138         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5139
5140         if (hw->mac.type == ixgbe_mac_82599EB ||
5141             hw->mac.type == ixgbe_mac_X540) {
5142                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5143                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5144                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5145                 else
5146                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5147                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5148                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5149         }
5150
5151         rc = ixgbe_set_rsc(dev);
5152         if (rc)
5153                 return rc;
5154
5155         ixgbe_set_rx_function(dev);
5156
5157         return 0;
5158 }
5159
5160 /*
5161  * Initializes Transmit Unit.
5162  */
5163 void __attribute__((cold))
5164 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5165 {
5166         struct ixgbe_hw     *hw;
5167         struct ixgbe_tx_queue *txq;
5168         uint64_t bus_addr;
5169         uint32_t hlreg0;
5170         uint32_t txctrl;
5171         uint16_t i;
5172
5173         PMD_INIT_FUNC_TRACE();
5174         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5175
5176         /* Enable TX CRC (checksum offload requirement) and hw padding
5177          * (TSO requirement)
5178          */
5179         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5180         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5181         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5182
5183         /* Setup the Base and Length of the Tx Descriptor Rings */
5184         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5185                 txq = dev->data->tx_queues[i];
5186
5187                 bus_addr = txq->tx_ring_phys_addr;
5188                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5189                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5190                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5191                                 (uint32_t)(bus_addr >> 32));
5192                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5193                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5194                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5195                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5196                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5197
5198                 /*
5199                  * Disable Tx Head Writeback RO bit, since this hoses
5200                  * bookkeeping if things aren't delivered in order.
5201                  */
5202                 switch (hw->mac.type) {
5203                 case ixgbe_mac_82598EB:
5204                         txctrl = IXGBE_READ_REG(hw,
5205                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5206                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5207                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5208                                         txctrl);
5209                         break;
5210
5211                 case ixgbe_mac_82599EB:
5212                 case ixgbe_mac_X540:
5213                 case ixgbe_mac_X550:
5214                 case ixgbe_mac_X550EM_x:
5215                 case ixgbe_mac_X550EM_a:
5216                 default:
5217                         txctrl = IXGBE_READ_REG(hw,
5218                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5219                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5220                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5221                                         txctrl);
5222                         break;
5223                 }
5224         }
5225
5226         /* Device configured with multiple TX queues. */
5227         ixgbe_dev_mq_tx_configure(dev);
5228 }
5229
5230 /*
5231  * Check if requested loopback mode is supported
5232  */
5233 int
5234 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5235 {
5236         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5237
5238         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5239                 if (hw->mac.type == ixgbe_mac_82599EB ||
5240                      hw->mac.type == ixgbe_mac_X540 ||
5241                      hw->mac.type == ixgbe_mac_X550 ||
5242                      hw->mac.type == ixgbe_mac_X550EM_x ||
5243                      hw->mac.type == ixgbe_mac_X550EM_a)
5244                         return 0;
5245
5246         return -ENOTSUP;
5247 }
5248
5249 /*
5250  * Set up link for 82599 loopback mode Tx->Rx.
5251  */
5252 static inline void __attribute__((cold))
5253 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5254 {
5255         PMD_INIT_FUNC_TRACE();
5256
5257         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5258                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5259                                 IXGBE_SUCCESS) {
5260                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5261                         /* ignore error */
5262                         return;
5263                 }
5264         }
5265
5266         /* Restart link */
5267         IXGBE_WRITE_REG(hw,
5268                         IXGBE_AUTOC,
5269                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5270         ixgbe_reset_pipeline_82599(hw);
5271
5272         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5273         msec_delay(50);
5274 }
5275
5276
5277 /*
5278  * Start Transmit and Receive Units.
5279  */
5280 int __attribute__((cold))
5281 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5282 {
5283         struct ixgbe_hw     *hw;
5284         struct ixgbe_tx_queue *txq;
5285         struct ixgbe_rx_queue *rxq;
5286         uint32_t txdctl;
5287         uint32_t dmatxctl;
5288         uint32_t rxctrl;
5289         uint16_t i;
5290         int ret = 0;
5291
5292         PMD_INIT_FUNC_TRACE();
5293         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5294
5295         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5296                 txq = dev->data->tx_queues[i];
5297                 /* Setup Transmit Threshold Registers */
5298                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5299                 txdctl |= txq->pthresh & 0x7F;
5300                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5301                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5302                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5303         }
5304
5305         if (hw->mac.type != ixgbe_mac_82598EB) {
5306                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5307                 dmatxctl |= IXGBE_DMATXCTL_TE;
5308                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5309         }
5310
5311         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5312                 txq = dev->data->tx_queues[i];
5313                 if (!txq->tx_deferred_start) {
5314                         ret = ixgbe_dev_tx_queue_start(dev, i);
5315                         if (ret < 0)
5316                                 return ret;
5317                 }
5318         }
5319
5320         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5321                 rxq = dev->data->rx_queues[i];
5322                 if (!rxq->rx_deferred_start) {
5323                         ret = ixgbe_dev_rx_queue_start(dev, i);
5324                         if (ret < 0)
5325                                 return ret;
5326                 }
5327         }
5328
5329         /* Enable Receive engine */
5330         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5331         if (hw->mac.type == ixgbe_mac_82598EB)
5332                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5333         rxctrl |= IXGBE_RXCTRL_RXEN;
5334         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5335
5336         /* If loopback mode is enabled, set up the link accordingly */
5337         if (dev->data->dev_conf.lpbk_mode != 0) {
5338                 if (hw->mac.type == ixgbe_mac_82599EB)
5339                         ixgbe_setup_loopback_link_82599(hw);
5340                 else if (hw->mac.type == ixgbe_mac_X540 ||
5341                      hw->mac.type == ixgbe_mac_X550 ||
5342                      hw->mac.type == ixgbe_mac_X550EM_x ||
5343                      hw->mac.type == ixgbe_mac_X550EM_a)
5344                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5345         }
5346
5347 #ifdef RTE_LIBRTE_SECURITY
5348         if ((dev->data->dev_conf.rxmode.offloads &
5349                         DEV_RX_OFFLOAD_SECURITY) ||
5350                 (dev->data->dev_conf.txmode.offloads &
5351                         DEV_TX_OFFLOAD_SECURITY)) {
5352                 ret = ixgbe_crypto_enable_ipsec(dev);
5353                 if (ret != 0) {
5354                         PMD_DRV_LOG(ERR,
5355                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5356                                     ret);
5357                         return ret;
5358                 }
5359         }
5360 #endif
5361
5362         return 0;
5363 }
5364
5365 /*
5366  * Start Receive Units for specified queue.
5367  */
5368 int __attribute__((cold))
5369 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5370 {
5371         struct ixgbe_hw     *hw;
5372         struct ixgbe_rx_queue *rxq;
5373         uint32_t rxdctl;
5374         int poll_ms;
5375
5376         PMD_INIT_FUNC_TRACE();
5377         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5378
5379         rxq = dev->data->rx_queues[rx_queue_id];
5380
5381         /* Allocate buffers for descriptor rings */
5382         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5383                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5384                              rx_queue_id);
5385                 return -1;
5386         }
5387         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5388         rxdctl |= IXGBE_RXDCTL_ENABLE;
5389         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5390
5391         /* Wait until RX Enable ready */
5392         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5393         do {
5394                 rte_delay_ms(1);
5395                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5396         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5397         if (!poll_ms)
5398                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5399         rte_wmb();
5400         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5401         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5402         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5403
5404         return 0;
5405 }
5406
5407 /*
5408  * Stop Receive Units for specified queue.
5409  */
5410 int __attribute__((cold))
5411 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5412 {
5413         struct ixgbe_hw     *hw;
5414         struct ixgbe_adapter *adapter = dev->data->dev_private;
5415         struct ixgbe_rx_queue *rxq;
5416         uint32_t rxdctl;
5417         int poll_ms;
5418
5419         PMD_INIT_FUNC_TRACE();
5420         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5421
5422         rxq = dev->data->rx_queues[rx_queue_id];
5423
5424         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5425         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5426         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5427
5428         /* Wait until RX Enable bit clear */
5429         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5430         do {
5431                 rte_delay_ms(1);
5432                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5433         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5434         if (!poll_ms)
5435                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5436
5437         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5438
5439         ixgbe_rx_queue_release_mbufs(rxq);
5440         ixgbe_reset_rx_queue(adapter, rxq);
5441         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5442
5443         return 0;
5444 }
5445
5446
5447 /*
5448  * Start Transmit Units for specified queue.
5449  */
5450 int __attribute__((cold))
5451 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5452 {
5453         struct ixgbe_hw     *hw;
5454         struct ixgbe_tx_queue *txq;
5455         uint32_t txdctl;
5456         int poll_ms;
5457
5458         PMD_INIT_FUNC_TRACE();
5459         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5460
5461         txq = dev->data->tx_queues[tx_queue_id];
5462         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5463         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5464         txdctl |= IXGBE_TXDCTL_ENABLE;
5465         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5466
5467         /* Wait until TX Enable ready */
5468         if (hw->mac.type == ixgbe_mac_82599EB) {
5469                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5470                 do {
5471                         rte_delay_ms(1);
5472                         txdctl = IXGBE_READ_REG(hw,
5473                                 IXGBE_TXDCTL(txq->reg_idx));
5474                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5475                 if (!poll_ms)
5476                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5477                                 tx_queue_id);
5478         }
5479         rte_wmb();
5480         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5481         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5482
5483         return 0;
5484 }
5485
5486 /*
5487  * Stop Transmit Units for specified queue.
5488  */
5489 int __attribute__((cold))
5490 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5491 {
5492         struct ixgbe_hw     *hw;
5493         struct ixgbe_tx_queue *txq;
5494         uint32_t txdctl;
5495         uint32_t txtdh, txtdt;
5496         int poll_ms;
5497
5498         PMD_INIT_FUNC_TRACE();
5499         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5500
5501         txq = dev->data->tx_queues[tx_queue_id];
5502
5503         /* Wait until TX queue is empty */
5504         if (hw->mac.type == ixgbe_mac_82599EB) {
5505                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5506                 do {
5507                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5508                         txtdh = IXGBE_READ_REG(hw,
5509                                                IXGBE_TDH(txq->reg_idx));
5510                         txtdt = IXGBE_READ_REG(hw,
5511                                                IXGBE_TDT(txq->reg_idx));
5512                 } while (--poll_ms && (txtdh != txtdt));
5513                 if (!poll_ms)
5514                         PMD_INIT_LOG(ERR,
5515                                 "Tx Queue %d is not empty when stopping.",
5516                                 tx_queue_id);
5517         }
5518
5519         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5520         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5521         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5522
5523         /* Wait until TX Enable bit clear */
5524         if (hw->mac.type == ixgbe_mac_82599EB) {
5525                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5526                 do {
5527                         rte_delay_ms(1);
5528                         txdctl = IXGBE_READ_REG(hw,
5529                                                 IXGBE_TXDCTL(txq->reg_idx));
5530                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5531                 if (!poll_ms)
5532                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5533                                 tx_queue_id);
5534         }
5535
5536         if (txq->ops != NULL) {
5537                 txq->ops->release_mbufs(txq);
5538                 txq->ops->reset(txq);
5539         }
5540         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5541
5542         return 0;
5543 }
5544
5545 void
5546 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5547         struct rte_eth_rxq_info *qinfo)
5548 {
5549         struct ixgbe_rx_queue *rxq;
5550
5551         rxq = dev->data->rx_queues[queue_id];
5552
5553         qinfo->mp = rxq->mb_pool;
5554         qinfo->scattered_rx = dev->data->scattered_rx;
5555         qinfo->nb_desc = rxq->nb_rx_desc;
5556
5557         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5558         qinfo->conf.rx_drop_en = rxq->drop_en;
5559         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5560         qinfo->conf.offloads = rxq->offloads;
5561 }
5562
5563 void
5564 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5565         struct rte_eth_txq_info *qinfo)
5566 {
5567         struct ixgbe_tx_queue *txq;
5568
5569         txq = dev->data->tx_queues[queue_id];
5570
5571         qinfo->nb_desc = txq->nb_tx_desc;
5572
5573         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5574         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5575         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5576
5577         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5578         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5579         qinfo->conf.offloads = txq->offloads;
5580         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5581 }
5582
5583 /*
5584  * [VF] Initializes Receive Unit.
5585  */
5586 int __attribute__((cold))
5587 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5588 {
5589         struct ixgbe_hw     *hw;
5590         struct ixgbe_rx_queue *rxq;
5591         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5592         uint64_t bus_addr;
5593         uint32_t srrctl, psrtype = 0;
5594         uint16_t buf_size;
5595         uint16_t i;
5596         int ret;
5597
5598         PMD_INIT_FUNC_TRACE();
5599         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5600
5601         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5602                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5603                         "it should be power of 2");
5604                 return -1;
5605         }
5606
5607         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5608                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5609                         "it should be equal to or less than %d",
5610                         hw->mac.max_rx_queues);
5611                 return -1;
5612         }
5613
5614         /*
5615          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5616          * disables the VF receipt of packets if the PF MTU is > 1500.
5617          * This is done to deal with 82599 limitations that imposes
5618          * the PF and all VFs to share the same MTU.
5619          * Then, the PF driver enables again the VF receipt of packet when
5620          * the VF driver issues a IXGBE_VF_SET_LPE request.
5621          * In the meantime, the VF device cannot be used, even if the VF driver
5622          * and the Guest VM network stack are ready to accept packets with a
5623          * size up to the PF MTU.
5624          * As a work-around to this PF behaviour, force the call to
5625          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5626          * VF packets received can work in all cases.
5627          */
5628         ixgbevf_rlpml_set_vf(hw,
5629                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5630
5631         /*
5632          * Assume no header split and no VLAN strip support
5633          * on any Rx queue first .
5634          */
5635         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5636         /* Setup RX queues */
5637         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5638                 rxq = dev->data->rx_queues[i];
5639
5640                 /* Allocate buffers for descriptor rings */
5641                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5642                 if (ret)
5643                         return ret;
5644
5645                 /* Setup the Base and Length of the Rx Descriptor Rings */
5646                 bus_addr = rxq->rx_ring_phys_addr;
5647
5648                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5649                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5650                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5651                                 (uint32_t)(bus_addr >> 32));
5652                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5653                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5654                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5655                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5656
5657
5658                 /* Configure the SRRCTL register */
5659                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5660
5661                 /* Set if packets are dropped when no descriptors available */
5662                 if (rxq->drop_en)
5663                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5664
5665                 /*
5666                  * Configure the RX buffer size in the BSIZEPACKET field of
5667                  * the SRRCTL register of the queue.
5668                  * The value is in 1 KB resolution. Valid values can be from
5669                  * 1 KB to 16 KB.
5670                  */
5671                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5672                         RTE_PKTMBUF_HEADROOM);
5673                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5674                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5675
5676                 /*
5677                  * VF modification to write virtual function SRRCTL register
5678                  */
5679                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5680
5681                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5682                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5683
5684                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5685                     /* It adds dual VLAN length for supporting dual VLAN */
5686                     (rxmode->max_rx_pkt_len +
5687                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5688                         if (!dev->data->scattered_rx)
5689                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5690                         dev->data->scattered_rx = 1;
5691                 }
5692
5693                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5694                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5695         }
5696
5697         /* Set RQPL for VF RSS according to max Rx queue */
5698         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5699                 IXGBE_PSRTYPE_RQPL_SHIFT;
5700         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5701
5702         ixgbe_set_rx_function(dev);
5703
5704         return 0;
5705 }
5706
5707 /*
5708  * [VF] Initializes Transmit Unit.
5709  */
5710 void __attribute__((cold))
5711 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5712 {
5713         struct ixgbe_hw     *hw;
5714         struct ixgbe_tx_queue *txq;
5715         uint64_t bus_addr;
5716         uint32_t txctrl;
5717         uint16_t i;
5718
5719         PMD_INIT_FUNC_TRACE();
5720         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5721
5722         /* Setup the Base and Length of the Tx Descriptor Rings */
5723         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5724                 txq = dev->data->tx_queues[i];
5725                 bus_addr = txq->tx_ring_phys_addr;
5726                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5727                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5728                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5729                                 (uint32_t)(bus_addr >> 32));
5730                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5731                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5732                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5733                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5734                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5735
5736                 /*
5737                  * Disable Tx Head Writeback RO bit, since this hoses
5738                  * bookkeeping if things aren't delivered in order.
5739                  */
5740                 txctrl = IXGBE_READ_REG(hw,
5741                                 IXGBE_VFDCA_TXCTRL(i));
5742                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5743                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5744                                 txctrl);
5745         }
5746 }
5747
5748 /*
5749  * [VF] Start Transmit and Receive Units.
5750  */
5751 void __attribute__((cold))
5752 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5753 {
5754         struct ixgbe_hw     *hw;
5755         struct ixgbe_tx_queue *txq;
5756         struct ixgbe_rx_queue *rxq;
5757         uint32_t txdctl;
5758         uint32_t rxdctl;
5759         uint16_t i;
5760         int poll_ms;
5761
5762         PMD_INIT_FUNC_TRACE();
5763         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5764
5765         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5766                 txq = dev->data->tx_queues[i];
5767                 /* Setup Transmit Threshold Registers */
5768                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5769                 txdctl |= txq->pthresh & 0x7F;
5770                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5771                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5772                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5773         }
5774
5775         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5776
5777                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5778                 txdctl |= IXGBE_TXDCTL_ENABLE;
5779                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5780
5781                 poll_ms = 10;
5782                 /* Wait until TX Enable ready */
5783                 do {
5784                         rte_delay_ms(1);
5785                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5786                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5787                 if (!poll_ms)
5788                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5789         }
5790         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5791
5792                 rxq = dev->data->rx_queues[i];
5793
5794                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5795                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5796                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5797
5798                 /* Wait until RX Enable ready */
5799                 poll_ms = 10;
5800                 do {
5801                         rte_delay_ms(1);
5802                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5803                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5804                 if (!poll_ms)
5805                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5806                 rte_wmb();
5807                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5808
5809         }
5810 }
5811
5812 int
5813 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5814                     const struct rte_flow_action_rss *in)
5815 {
5816         if (in->key_len > RTE_DIM(out->key) ||
5817             in->queue_num > RTE_DIM(out->queue))
5818                 return -EINVAL;
5819         out->conf = (struct rte_flow_action_rss){
5820                 .func = in->func,
5821                 .level = in->level,
5822                 .types = in->types,
5823                 .key_len = in->key_len,
5824                 .queue_num = in->queue_num,
5825                 .key = memcpy(out->key, in->key, in->key_len),
5826                 .queue = memcpy(out->queue, in->queue,
5827                                 sizeof(*in->queue) * in->queue_num),
5828         };
5829         return 0;
5830 }
5831
5832 int
5833 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5834                       const struct rte_flow_action_rss *with)
5835 {
5836         return (comp->func == with->func &&
5837                 comp->level == with->level &&
5838                 comp->types == with->types &&
5839                 comp->key_len == with->key_len &&
5840                 comp->queue_num == with->queue_num &&
5841                 !memcmp(comp->key, with->key, with->key_len) &&
5842                 !memcmp(comp->queue, with->queue,
5843                         sizeof(*with->queue) * with->queue_num));
5844 }
5845
5846 int
5847 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5848                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5849 {
5850         struct ixgbe_hw *hw;
5851         uint32_t reta;
5852         uint16_t i;
5853         uint16_t j;
5854         uint16_t sp_reta_size;
5855         uint32_t reta_reg;
5856         struct rte_eth_rss_conf rss_conf = {
5857                 .rss_key = conf->conf.key_len ?
5858                         (void *)(uintptr_t)conf->conf.key : NULL,
5859                 .rss_key_len = conf->conf.key_len,
5860                 .rss_hf = conf->conf.types,
5861         };
5862         struct ixgbe_filter_info *filter_info =
5863                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5864
5865         PMD_INIT_FUNC_TRACE();
5866         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5867
5868         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5869
5870         if (!add) {
5871                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5872                                           &conf->conf)) {
5873                         ixgbe_rss_disable(dev);
5874                         memset(&filter_info->rss_info, 0,
5875                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5876                         return 0;
5877                 }
5878                 return -EINVAL;
5879         }
5880
5881         if (filter_info->rss_info.conf.queue_num)
5882                 return -EINVAL;
5883         /* Fill in redirection table
5884          * The byte-swap is needed because NIC registers are in
5885          * little-endian order.
5886          */
5887         reta = 0;
5888         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5889                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5890
5891                 if (j == conf->conf.queue_num)
5892                         j = 0;
5893                 reta = (reta << 8) | conf->conf.queue[j];
5894                 if ((i & 3) == 3)
5895                         IXGBE_WRITE_REG(hw, reta_reg,
5896                                         rte_bswap32(reta));
5897         }
5898
5899         /* Configure the RSS key and the RSS protocols used to compute
5900          * the RSS hash of input packets.
5901          */
5902         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5903                 ixgbe_rss_disable(dev);
5904                 return 0;
5905         }
5906         if (rss_conf.rss_key == NULL)
5907                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5908         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5909
5910         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5911                 return -EINVAL;
5912
5913         return 0;
5914 }
5915
5916 /* Stubs needed for linkage when CONFIG_RTE_ARCH_PPC_64 is set */
5917 #if defined(RTE_ARCH_PPC_64)
5918 int
5919 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5920 {
5921         return -1;
5922 }
5923
5924 uint16_t
5925 ixgbe_recv_pkts_vec(
5926         void __rte_unused *rx_queue,
5927         struct rte_mbuf __rte_unused **rx_pkts,
5928         uint16_t __rte_unused nb_pkts)
5929 {
5930         return 0;
5931 }
5932
5933 uint16_t
5934 ixgbe_recv_scattered_pkts_vec(
5935         void __rte_unused *rx_queue,
5936         struct rte_mbuf __rte_unused **rx_pkts,
5937         uint16_t __rte_unused nb_pkts)
5938 {
5939         return 0;
5940 }
5941
5942 int
5943 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5944 {
5945         return -1;
5946 }
5947
5948 uint16_t
5949 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5950                 struct rte_mbuf __rte_unused **tx_pkts,
5951                 uint16_t __rte_unused nb_pkts)
5952 {
5953         return 0;
5954 }
5955
5956 int
5957 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5958 {
5959         return -1;
5960 }
5961
5962 void
5963 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
5964 {
5965         return;
5966 }
5967 #endif