977ecf513792e055f49852dab6d70578ec503210
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45
46 #include "ixgbe_logs.h"
47 #include "base/ixgbe_api.h"
48 #include "base/ixgbe_vf.h"
49 #include "ixgbe_ethdev.h"
50 #include "base/ixgbe_dcb.h"
51 #include "base/ixgbe_common.h"
52 #include "ixgbe_rxtx.h"
53
54 #ifdef RTE_LIBRTE_IEEE1588
55 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
56 #else
57 #define IXGBE_TX_IEEE1588_TMST 0
58 #endif
59 /* Bit Mask to indicate what bits required for building TX context */
60 #define IXGBE_TX_OFFLOAD_MASK (                  \
61                 PKT_TX_OUTER_IPV6 |              \
62                 PKT_TX_OUTER_IPV4 |              \
63                 PKT_TX_IPV6 |                    \
64                 PKT_TX_IPV4 |                    \
65                 PKT_TX_VLAN_PKT |                \
66                 PKT_TX_IP_CKSUM |                \
67                 PKT_TX_L4_MASK |                 \
68                 PKT_TX_TCP_SEG |                 \
69                 PKT_TX_MACSEC |                  \
70                 PKT_TX_OUTER_IP_CKSUM |          \
71                 PKT_TX_SEC_OFFLOAD |     \
72                 IXGBE_TX_IEEE1588_TMST)
73
74 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
75                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
76
77 #if 1
78 #define RTE_PMD_USE_PREFETCH
79 #endif
80
81 #ifdef RTE_PMD_USE_PREFETCH
82 /*
83  * Prefetch a cache line into all cache levels.
84  */
85 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
86 #else
87 #define rte_ixgbe_prefetch(p)   do {} while (0)
88 #endif
89
90 /*********************************************************************
91  *
92  *  TX functions
93  *
94  **********************************************************************/
95
96 /*
97  * Check for descriptors with their DD bit set and free mbufs.
98  * Return the total number of buffers freed.
99  */
100 static __rte_always_inline int
101 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
102 {
103         struct ixgbe_tx_entry *txep;
104         uint32_t status;
105         int i, nb_free = 0;
106         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
107
108         /* check DD bit on threshold descriptor */
109         status = txq->tx_ring[txq->tx_next_dd].wb.status;
110         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
111                 return 0;
112
113         /*
114          * first buffer to free from S/W ring is at index
115          * tx_next_dd - (tx_rs_thresh-1)
116          */
117         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
118
119         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
120                 /* free buffers one at a time */
121                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
122                 txep->mbuf = NULL;
123
124                 if (unlikely(m == NULL))
125                         continue;
126
127                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
128                     (nb_free > 0 && m->pool != free[0]->pool)) {
129                         rte_mempool_put_bulk(free[0]->pool,
130                                              (void **)free, nb_free);
131                         nb_free = 0;
132                 }
133
134                 free[nb_free++] = m;
135         }
136
137         if (nb_free > 0)
138                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
139
140         /* buffers were freed, update counters */
141         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
142         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
143         if (txq->tx_next_dd >= txq->nb_tx_desc)
144                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
145
146         return txq->tx_rs_thresh;
147 }
148
149 /* Populate 4 descriptors with data from 4 mbufs */
150 static inline void
151 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
152 {
153         uint64_t buf_dma_addr;
154         uint32_t pkt_len;
155         int i;
156
157         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
158                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
159                 pkt_len = (*pkts)->data_len;
160
161                 /* write data to descriptor */
162                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
163
164                 txdp->read.cmd_type_len =
165                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
166
167                 txdp->read.olinfo_status =
168                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
169
170                 rte_prefetch0(&(*pkts)->pool);
171         }
172 }
173
174 /* Populate 1 descriptor with data from 1 mbuf */
175 static inline void
176 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
177 {
178         uint64_t buf_dma_addr;
179         uint32_t pkt_len;
180
181         buf_dma_addr = rte_mbuf_data_iova(*pkts);
182         pkt_len = (*pkts)->data_len;
183
184         /* write data to descriptor */
185         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
186         txdp->read.cmd_type_len =
187                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
188         txdp->read.olinfo_status =
189                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
190         rte_prefetch0(&(*pkts)->pool);
191 }
192
193 /*
194  * Fill H/W descriptor ring with mbuf data.
195  * Copy mbuf pointers to the S/W ring.
196  */
197 static inline void
198 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
199                       uint16_t nb_pkts)
200 {
201         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
202         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
203         const int N_PER_LOOP = 4;
204         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
205         int mainpart, leftover;
206         int i, j;
207
208         /*
209          * Process most of the packets in chunks of N pkts.  Any
210          * leftover packets will get processed one at a time.
211          */
212         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
213         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
214         for (i = 0; i < mainpart; i += N_PER_LOOP) {
215                 /* Copy N mbuf pointers to the S/W ring */
216                 for (j = 0; j < N_PER_LOOP; ++j) {
217                         (txep + i + j)->mbuf = *(pkts + i + j);
218                 }
219                 tx4(txdp + i, pkts + i);
220         }
221
222         if (unlikely(leftover > 0)) {
223                 for (i = 0; i < leftover; ++i) {
224                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
225                         tx1(txdp + mainpart + i, pkts + mainpart + i);
226                 }
227         }
228 }
229
230 static inline uint16_t
231 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
232              uint16_t nb_pkts)
233 {
234         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
235         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
236         uint16_t n = 0;
237
238         /*
239          * Begin scanning the H/W ring for done descriptors when the
240          * number of available descriptors drops below tx_free_thresh.  For
241          * each done descriptor, free the associated buffer.
242          */
243         if (txq->nb_tx_free < txq->tx_free_thresh)
244                 ixgbe_tx_free_bufs(txq);
245
246         /* Only use descriptors that are available */
247         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
248         if (unlikely(nb_pkts == 0))
249                 return 0;
250
251         /* Use exactly nb_pkts descriptors */
252         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
253
254         /*
255          * At this point, we know there are enough descriptors in the
256          * ring to transmit all the packets.  This assumes that each
257          * mbuf contains a single segment, and that no new offloads
258          * are expected, which would require a new context descriptor.
259          */
260
261         /*
262          * See if we're going to wrap-around. If so, handle the top
263          * of the descriptor ring first, then do the bottom.  If not,
264          * the processing looks just like the "bottom" part anyway...
265          */
266         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
267                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
268                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
269
270                 /*
271                  * We know that the last descriptor in the ring will need to
272                  * have its RS bit set because tx_rs_thresh has to be
273                  * a divisor of the ring size
274                  */
275                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
276                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
277                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
278
279                 txq->tx_tail = 0;
280         }
281
282         /* Fill H/W descriptor ring with mbuf data */
283         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
284         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
285
286         /*
287          * Determine if RS bit should be set
288          * This is what we actually want:
289          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
290          * but instead of subtracting 1 and doing >=, we can just do
291          * greater than without subtracting.
292          */
293         if (txq->tx_tail > txq->tx_next_rs) {
294                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
295                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
296                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
297                                                 txq->tx_rs_thresh);
298                 if (txq->tx_next_rs >= txq->nb_tx_desc)
299                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
300         }
301
302         /*
303          * Check for wrap-around. This would only happen if we used
304          * up to the last descriptor in the ring, no more, no less.
305          */
306         if (txq->tx_tail >= txq->nb_tx_desc)
307                 txq->tx_tail = 0;
308
309         /* update tail pointer */
310         rte_wmb();
311         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
312
313         return nb_pkts;
314 }
315
316 uint16_t
317 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
318                        uint16_t nb_pkts)
319 {
320         uint16_t nb_tx;
321
322         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
323         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
324                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
325
326         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
327         nb_tx = 0;
328         while (nb_pkts) {
329                 uint16_t ret, n;
330
331                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
332                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
333                 nb_tx = (uint16_t)(nb_tx + ret);
334                 nb_pkts = (uint16_t)(nb_pkts - ret);
335                 if (ret < n)
336                         break;
337         }
338
339         return nb_tx;
340 }
341
342 static uint16_t
343 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
344                     uint16_t nb_pkts)
345 {
346         uint16_t nb_tx = 0;
347         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
348
349         while (nb_pkts) {
350                 uint16_t ret, num;
351
352                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
353                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
354                                                  num);
355                 nb_tx += ret;
356                 nb_pkts -= ret;
357                 if (ret < num)
358                         break;
359         }
360
361         return nb_tx;
362 }
363
364 static inline void
365 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
366                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
367                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
368                 __rte_unused uint64_t *mdata)
369 {
370         uint32_t type_tucmd_mlhl;
371         uint32_t mss_l4len_idx = 0;
372         uint32_t ctx_idx;
373         uint32_t vlan_macip_lens;
374         union ixgbe_tx_offload tx_offload_mask;
375         uint32_t seqnum_seed = 0;
376
377         ctx_idx = txq->ctx_curr;
378         tx_offload_mask.data[0] = 0;
379         tx_offload_mask.data[1] = 0;
380         type_tucmd_mlhl = 0;
381
382         /* Specify which HW CTX to upload. */
383         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
384
385         if (ol_flags & PKT_TX_VLAN_PKT) {
386                 tx_offload_mask.vlan_tci |= ~0;
387         }
388
389         /* check if TCP segmentation required for this packet */
390         if (ol_flags & PKT_TX_TCP_SEG) {
391                 /* implies IP cksum in IPv4 */
392                 if (ol_flags & PKT_TX_IP_CKSUM)
393                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
394                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
395                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
396                 else
397                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
398                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
399                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
400
401                 tx_offload_mask.l2_len |= ~0;
402                 tx_offload_mask.l3_len |= ~0;
403                 tx_offload_mask.l4_len |= ~0;
404                 tx_offload_mask.tso_segsz |= ~0;
405                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
406                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
407         } else { /* no TSO, check if hardware checksum is needed */
408                 if (ol_flags & PKT_TX_IP_CKSUM) {
409                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
410                         tx_offload_mask.l2_len |= ~0;
411                         tx_offload_mask.l3_len |= ~0;
412                 }
413
414                 switch (ol_flags & PKT_TX_L4_MASK) {
415                 case PKT_TX_UDP_CKSUM:
416                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
417                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
418                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
419                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
427                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
428                         tx_offload_mask.l2_len |= ~0;
429                         tx_offload_mask.l3_len |= ~0;
430                         break;
431                 case PKT_TX_SCTP_CKSUM:
432                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
433                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
434                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
435                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
436                         tx_offload_mask.l2_len |= ~0;
437                         tx_offload_mask.l3_len |= ~0;
438                         break;
439                 default:
440                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
441                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
442                         break;
443                 }
444         }
445
446         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
447                 tx_offload_mask.outer_l2_len |= ~0;
448                 tx_offload_mask.outer_l3_len |= ~0;
449                 tx_offload_mask.l2_len |= ~0;
450                 seqnum_seed |= tx_offload.outer_l3_len
451                                << IXGBE_ADVTXD_OUTER_IPLEN;
452                 seqnum_seed |= tx_offload.l2_len
453                                << IXGBE_ADVTXD_TUNNEL_LEN;
454         }
455 #ifdef RTE_LIBRTE_SECURITY
456         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
457                 union ixgbe_crypto_tx_desc_md *md =
458                                 (union ixgbe_crypto_tx_desc_md *)mdata;
459                 seqnum_seed |=
460                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
461                 type_tucmd_mlhl |= md->enc ?
462                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
463                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
464                 type_tucmd_mlhl |=
465                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
466                 tx_offload_mask.sa_idx |= ~0;
467                 tx_offload_mask.sec_pad_len |= ~0;
468         }
469 #endif
470
471         txq->ctx_cache[ctx_idx].flags = ol_flags;
472         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
473                 tx_offload_mask.data[0] & tx_offload.data[0];
474         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
475                 tx_offload_mask.data[1] & tx_offload.data[1];
476         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
477
478         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
479         vlan_macip_lens = tx_offload.l3_len;
480         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
481                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
482                                     IXGBE_ADVTXD_MACLEN_SHIFT);
483         else
484                 vlan_macip_lens |= (tx_offload.l2_len <<
485                                     IXGBE_ADVTXD_MACLEN_SHIFT);
486         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
487         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
488         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
489         ctx_txd->seqnum_seed     = seqnum_seed;
490 }
491
492 /*
493  * Check which hardware context can be used. Use the existing match
494  * or create a new context descriptor.
495  */
496 static inline uint32_t
497 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
498                    union ixgbe_tx_offload tx_offload)
499 {
500         /* If match with the current used context */
501         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
502                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
503                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
504                      & tx_offload.data[0])) &&
505                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
506                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
507                      & tx_offload.data[1]))))
508                 return txq->ctx_curr;
509
510         /* What if match with the next context  */
511         txq->ctx_curr ^= 1;
512         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
513                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
514                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
515                      & tx_offload.data[0])) &&
516                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
517                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
518                      & tx_offload.data[1]))))
519                 return txq->ctx_curr;
520
521         /* Mismatch, use the previous context */
522         return IXGBE_CTX_NUM;
523 }
524
525 static inline uint32_t
526 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
527 {
528         uint32_t tmp = 0;
529
530         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
531                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
532         if (ol_flags & PKT_TX_IP_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
534         if (ol_flags & PKT_TX_TCP_SEG)
535                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
536         return tmp;
537 }
538
539 static inline uint32_t
540 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
541 {
542         uint32_t cmdtype = 0;
543
544         if (ol_flags & PKT_TX_VLAN_PKT)
545                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
548         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
549                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
550         if (ol_flags & PKT_TX_MACSEC)
551                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
552         return cmdtype;
553 }
554
555 /* Default RS bit threshold values */
556 #ifndef DEFAULT_TX_RS_THRESH
557 #define DEFAULT_TX_RS_THRESH   32
558 #endif
559 #ifndef DEFAULT_TX_FREE_THRESH
560 #define DEFAULT_TX_FREE_THRESH 32
561 #endif
562
563 /* Reset transmit descriptors after they have been used */
564 static inline int
565 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
566 {
567         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
568         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
569         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
570         uint16_t nb_tx_desc = txq->nb_tx_desc;
571         uint16_t desc_to_clean_to;
572         uint16_t nb_tx_to_clean;
573         uint32_t status;
574
575         /* Determine the last descriptor needing to be cleaned */
576         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
577         if (desc_to_clean_to >= nb_tx_desc)
578                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
579
580         /* Check to make sure the last descriptor to clean is done */
581         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
582         status = txr[desc_to_clean_to].wb.status;
583         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
584                 PMD_TX_FREE_LOG(DEBUG,
585                                 "TX descriptor %4u is not done"
586                                 "(port=%d queue=%d)",
587                                 desc_to_clean_to,
588                                 txq->port_id, txq->queue_id);
589                 /* Failed to clean any descriptors, better luck next time */
590                 return -(1);
591         }
592
593         /* Figure out how many descriptors will be cleaned */
594         if (last_desc_cleaned > desc_to_clean_to)
595                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
596                                                         desc_to_clean_to);
597         else
598                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
599                                                 last_desc_cleaned);
600
601         PMD_TX_FREE_LOG(DEBUG,
602                         "Cleaning %4u TX descriptors: %4u to %4u "
603                         "(port=%d queue=%d)",
604                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
605                         txq->port_id, txq->queue_id);
606
607         /*
608          * The last descriptor to clean is done, so that means all the
609          * descriptors from the last descriptor that was cleaned
610          * up to the last descriptor with the RS bit set
611          * are done. Only reset the threshold descriptor.
612          */
613         txr[desc_to_clean_to].wb.status = 0;
614
615         /* Update the txq to reflect the last descriptor that was cleaned */
616         txq->last_desc_cleaned = desc_to_clean_to;
617         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
618
619         /* No Error */
620         return 0;
621 }
622
623 uint16_t
624 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
625                 uint16_t nb_pkts)
626 {
627         struct ixgbe_tx_queue *txq;
628         struct ixgbe_tx_entry *sw_ring;
629         struct ixgbe_tx_entry *txe, *txn;
630         volatile union ixgbe_adv_tx_desc *txr;
631         volatile union ixgbe_adv_tx_desc *txd, *txp;
632         struct rte_mbuf     *tx_pkt;
633         struct rte_mbuf     *m_seg;
634         uint64_t buf_dma_addr;
635         uint32_t olinfo_status;
636         uint32_t cmd_type_len;
637         uint32_t pkt_len;
638         uint16_t slen;
639         uint64_t ol_flags;
640         uint16_t tx_id;
641         uint16_t tx_last;
642         uint16_t nb_tx;
643         uint16_t nb_used;
644         uint64_t tx_ol_req;
645         uint32_t ctx = 0;
646         uint32_t new_ctx;
647         union ixgbe_tx_offload tx_offload;
648 #ifdef RTE_LIBRTE_SECURITY
649         uint8_t use_ipsec;
650 #endif
651
652         tx_offload.data[0] = 0;
653         tx_offload.data[1] = 0;
654         txq = tx_queue;
655         sw_ring = txq->sw_ring;
656         txr     = txq->tx_ring;
657         tx_id   = txq->tx_tail;
658         txe = &sw_ring[tx_id];
659         txp = NULL;
660
661         /* Determine if the descriptor ring needs to be cleaned. */
662         if (txq->nb_tx_free < txq->tx_free_thresh)
663                 ixgbe_xmit_cleanup(txq);
664
665         rte_prefetch0(&txe->mbuf->pool);
666
667         /* TX loop */
668         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
669                 new_ctx = 0;
670                 tx_pkt = *tx_pkts++;
671                 pkt_len = tx_pkt->pkt_len;
672
673                 /*
674                  * Determine how many (if any) context descriptors
675                  * are needed for offload functionality.
676                  */
677                 ol_flags = tx_pkt->ol_flags;
678 #ifdef RTE_LIBRTE_SECURITY
679                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
680 #endif
681
682                 /* If hardware offload required */
683                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
684                 if (tx_ol_req) {
685                         tx_offload.l2_len = tx_pkt->l2_len;
686                         tx_offload.l3_len = tx_pkt->l3_len;
687                         tx_offload.l4_len = tx_pkt->l4_len;
688                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
689                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
690                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
691                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
692 #ifdef RTE_LIBRTE_SECURITY
693                         if (use_ipsec) {
694                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
695                                         (union ixgbe_crypto_tx_desc_md *)
696                                                         &tx_pkt->udata64;
697                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
698                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
699                         }
700 #endif
701
702                         /* If new context need be built or reuse the exist ctx. */
703                         ctx = what_advctx_update(txq, tx_ol_req,
704                                 tx_offload);
705                         /* Only allocate context descriptor if required*/
706                         new_ctx = (ctx == IXGBE_CTX_NUM);
707                         ctx = txq->ctx_curr;
708                 }
709
710                 /*
711                  * Keep track of how many descriptors are used this loop
712                  * This will always be the number of segments + the number of
713                  * Context descriptors required to transmit the packet
714                  */
715                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
716
717                 if (txp != NULL &&
718                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
719                         /* set RS on the previous packet in the burst */
720                         txp->read.cmd_type_len |=
721                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
722
723                 /*
724                  * The number of descriptors that must be allocated for a
725                  * packet is the number of segments of that packet, plus 1
726                  * Context Descriptor for the hardware offload, if any.
727                  * Determine the last TX descriptor to allocate in the TX ring
728                  * for the packet, starting from the current position (tx_id)
729                  * in the ring.
730                  */
731                 tx_last = (uint16_t) (tx_id + nb_used - 1);
732
733                 /* Circular ring */
734                 if (tx_last >= txq->nb_tx_desc)
735                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
736
737                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
738                            " tx_first=%u tx_last=%u",
739                            (unsigned) txq->port_id,
740                            (unsigned) txq->queue_id,
741                            (unsigned) pkt_len,
742                            (unsigned) tx_id,
743                            (unsigned) tx_last);
744
745                 /*
746                  * Make sure there are enough TX descriptors available to
747                  * transmit the entire packet.
748                  * nb_used better be less than or equal to txq->tx_rs_thresh
749                  */
750                 if (nb_used > txq->nb_tx_free) {
751                         PMD_TX_FREE_LOG(DEBUG,
752                                         "Not enough free TX descriptors "
753                                         "nb_used=%4u nb_free=%4u "
754                                         "(port=%d queue=%d)",
755                                         nb_used, txq->nb_tx_free,
756                                         txq->port_id, txq->queue_id);
757
758                         if (ixgbe_xmit_cleanup(txq) != 0) {
759                                 /* Could not clean any descriptors */
760                                 if (nb_tx == 0)
761                                         return 0;
762                                 goto end_of_tx;
763                         }
764
765                         /* nb_used better be <= txq->tx_rs_thresh */
766                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
767                                 PMD_TX_FREE_LOG(DEBUG,
768                                         "The number of descriptors needed to "
769                                         "transmit the packet exceeds the "
770                                         "RS bit threshold. This will impact "
771                                         "performance."
772                                         "nb_used=%4u nb_free=%4u "
773                                         "tx_rs_thresh=%4u. "
774                                         "(port=%d queue=%d)",
775                                         nb_used, txq->nb_tx_free,
776                                         txq->tx_rs_thresh,
777                                         txq->port_id, txq->queue_id);
778                                 /*
779                                  * Loop here until there are enough TX
780                                  * descriptors or until the ring cannot be
781                                  * cleaned.
782                                  */
783                                 while (nb_used > txq->nb_tx_free) {
784                                         if (ixgbe_xmit_cleanup(txq) != 0) {
785                                                 /*
786                                                  * Could not clean any
787                                                  * descriptors
788                                                  */
789                                                 if (nb_tx == 0)
790                                                         return 0;
791                                                 goto end_of_tx;
792                                         }
793                                 }
794                         }
795                 }
796
797                 /*
798                  * By now there are enough free TX descriptors to transmit
799                  * the packet.
800                  */
801
802                 /*
803                  * Set common flags of all TX Data Descriptors.
804                  *
805                  * The following bits must be set in all Data Descriptors:
806                  *   - IXGBE_ADVTXD_DTYP_DATA
807                  *   - IXGBE_ADVTXD_DCMD_DEXT
808                  *
809                  * The following bits must be set in the first Data Descriptor
810                  * and are ignored in the other ones:
811                  *   - IXGBE_ADVTXD_DCMD_IFCS
812                  *   - IXGBE_ADVTXD_MAC_1588
813                  *   - IXGBE_ADVTXD_DCMD_VLE
814                  *
815                  * The following bits must only be set in the last Data
816                  * Descriptor:
817                  *   - IXGBE_TXD_CMD_EOP
818                  *
819                  * The following bits can be set in any Data Descriptor, but
820                  * are only set in the last Data Descriptor:
821                  *   - IXGBE_TXD_CMD_RS
822                  */
823                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
824                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
825
826 #ifdef RTE_LIBRTE_IEEE1588
827                 if (ol_flags & PKT_TX_IEEE1588_TMST)
828                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
829 #endif
830
831                 olinfo_status = 0;
832                 if (tx_ol_req) {
833
834                         if (ol_flags & PKT_TX_TCP_SEG) {
835                                 /* when TSO is on, paylen in descriptor is the
836                                  * not the packet len but the tcp payload len */
837                                 pkt_len -= (tx_offload.l2_len +
838                                         tx_offload.l3_len + tx_offload.l4_len);
839                         }
840
841                         /*
842                          * Setup the TX Advanced Context Descriptor if required
843                          */
844                         if (new_ctx) {
845                                 volatile struct ixgbe_adv_tx_context_desc *
846                                     ctx_txd;
847
848                                 ctx_txd = (volatile struct
849                                     ixgbe_adv_tx_context_desc *)
850                                     &txr[tx_id];
851
852                                 txn = &sw_ring[txe->next_id];
853                                 rte_prefetch0(&txn->mbuf->pool);
854
855                                 if (txe->mbuf != NULL) {
856                                         rte_pktmbuf_free_seg(txe->mbuf);
857                                         txe->mbuf = NULL;
858                                 }
859
860                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
861                                         tx_offload, &tx_pkt->udata64);
862
863                                 txe->last_id = tx_last;
864                                 tx_id = txe->next_id;
865                                 txe = txn;
866                         }
867
868                         /*
869                          * Setup the TX Advanced Data Descriptor,
870                          * This path will go through
871                          * whatever new/reuse the context descriptor
872                          */
873                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
874                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
875                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
876                 }
877
878                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
879 #ifdef RTE_LIBRTE_SECURITY
880                 if (use_ipsec)
881                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
882 #endif
883
884                 m_seg = tx_pkt;
885                 do {
886                         txd = &txr[tx_id];
887                         txn = &sw_ring[txe->next_id];
888                         rte_prefetch0(&txn->mbuf->pool);
889
890                         if (txe->mbuf != NULL)
891                                 rte_pktmbuf_free_seg(txe->mbuf);
892                         txe->mbuf = m_seg;
893
894                         /*
895                          * Set up Transmit Data Descriptor.
896                          */
897                         slen = m_seg->data_len;
898                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
899                         txd->read.buffer_addr =
900                                 rte_cpu_to_le_64(buf_dma_addr);
901                         txd->read.cmd_type_len =
902                                 rte_cpu_to_le_32(cmd_type_len | slen);
903                         txd->read.olinfo_status =
904                                 rte_cpu_to_le_32(olinfo_status);
905                         txe->last_id = tx_last;
906                         tx_id = txe->next_id;
907                         txe = txn;
908                         m_seg = m_seg->next;
909                 } while (m_seg != NULL);
910
911                 /*
912                  * The last packet data descriptor needs End Of Packet (EOP)
913                  */
914                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
915                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
916                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
917
918                 /* Set RS bit only on threshold packets' last descriptor */
919                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
920                         PMD_TX_FREE_LOG(DEBUG,
921                                         "Setting RS bit on TXD id="
922                                         "%4u (port=%d queue=%d)",
923                                         tx_last, txq->port_id, txq->queue_id);
924
925                         cmd_type_len |= IXGBE_TXD_CMD_RS;
926
927                         /* Update txq RS bit counters */
928                         txq->nb_tx_used = 0;
929                         txp = NULL;
930                 } else
931                         txp = txd;
932
933                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
934         }
935
936 end_of_tx:
937         /* set RS on last packet in the burst */
938         if (txp != NULL)
939                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
940
941         rte_wmb();
942
943         /*
944          * Set the Transmit Descriptor Tail (TDT)
945          */
946         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
947                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
948                    (unsigned) tx_id, (unsigned) nb_tx);
949         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
950         txq->tx_tail = tx_id;
951
952         return nb_tx;
953 }
954
955 /*********************************************************************
956  *
957  *  TX prep functions
958  *
959  **********************************************************************/
960 uint16_t
961 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
962 {
963         int i, ret;
964         uint64_t ol_flags;
965         struct rte_mbuf *m;
966         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
967
968         for (i = 0; i < nb_pkts; i++) {
969                 m = tx_pkts[i];
970                 ol_flags = m->ol_flags;
971
972                 /**
973                  * Check if packet meets requirements for number of segments
974                  *
975                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
976                  *       non-TSO
977                  */
978
979                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
980                         rte_errno = EINVAL;
981                         return i;
982                 }
983
984                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
985                         rte_errno = ENOTSUP;
986                         return i;
987                 }
988
989                 /* check the size of packet */
990                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
991                         rte_errno = EINVAL;
992                         return i;
993                 }
994
995 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
996                 ret = rte_validate_tx_offload(m);
997                 if (ret != 0) {
998                         rte_errno = -ret;
999                         return i;
1000                 }
1001 #endif
1002                 ret = rte_net_intel_cksum_prepare(m);
1003                 if (ret != 0) {
1004                         rte_errno = -ret;
1005                         return i;
1006                 }
1007         }
1008
1009         return i;
1010 }
1011
1012 /*********************************************************************
1013  *
1014  *  RX functions
1015  *
1016  **********************************************************************/
1017
1018 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1019 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1020 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1021 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1022 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1023 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1024 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1027 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1028 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1029 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1030 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1031 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1032 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1035 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1036 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1039 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1040 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1043 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1044 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1047 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1048 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1051
1052 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1075
1076 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1099
1100 /**
1101  * Use 2 different table for normal packet and tunnel packet
1102  * to save the space.
1103  */
1104 const uint32_t
1105         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1106         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1107         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1108                 RTE_PTYPE_L3_IPV4,
1109         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1110                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1111         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1112                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1113         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1114                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1115         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1116                 RTE_PTYPE_L3_IPV4_EXT,
1117         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1118                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1119         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1120                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1121         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1122                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1123         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1124                 RTE_PTYPE_L3_IPV6,
1125         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1126                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1127         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1128                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1129         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1131         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1132                 RTE_PTYPE_L3_IPV6_EXT,
1133         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1134                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1135         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1137         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1138                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1139         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1140                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1141                 RTE_PTYPE_INNER_L3_IPV6,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1145         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1148         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1151         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1157         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1160         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1163         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1175         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1185                 RTE_PTYPE_L2_ETHER |
1186                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1187                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1188 };
1189
1190 const uint32_t
1191         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1192         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1193                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1194                 RTE_PTYPE_INNER_L2_ETHER,
1195         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1210         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1216                 RTE_PTYPE_INNER_L4_TCP,
1217         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1218                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1220                 RTE_PTYPE_INNER_L4_TCP,
1221         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1222                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1223                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1227                 RTE_PTYPE_INNER_L4_TCP,
1228         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1229                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1230                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1231                 RTE_PTYPE_INNER_L3_IPV4,
1232         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1233                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1234                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1235                 RTE_PTYPE_INNER_L4_UDP,
1236         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1237                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1238                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1239                 RTE_PTYPE_INNER_L4_UDP,
1240         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1241                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1242                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1243                 RTE_PTYPE_INNER_L4_SCTP,
1244         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1245                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1246                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1250                 RTE_PTYPE_INNER_L4_UDP,
1251         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1252                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1253                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1254                 RTE_PTYPE_INNER_L4_SCTP,
1255         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1256                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1257                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1258                 RTE_PTYPE_INNER_L3_IPV4,
1259         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1260                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1261                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1262                 RTE_PTYPE_INNER_L4_SCTP,
1263         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1264                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1265                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1266                 RTE_PTYPE_INNER_L4_SCTP,
1267         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1268                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1269                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1270                 RTE_PTYPE_INNER_L4_TCP,
1271         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1272                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1273                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1274                 RTE_PTYPE_INNER_L4_UDP,
1275
1276         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1277                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1278                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1279         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1282                 RTE_PTYPE_INNER_L3_IPV4,
1283         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1284                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1285                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1286                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1287         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1288                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1289                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1290                 RTE_PTYPE_INNER_L3_IPV6,
1291         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1292                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1293                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1294                 RTE_PTYPE_INNER_L3_IPV4,
1295         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1296                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1297                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1298                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1299         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1300                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1301                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1302                 RTE_PTYPE_INNER_L3_IPV4,
1303         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1304                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1305                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1306                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1307         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1308                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1309                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1310                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1311         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1312                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1313                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1314                 RTE_PTYPE_INNER_L3_IPV4,
1315         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1316                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1317                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1318                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1319         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1320                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1321                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1322                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1323         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1324                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1325                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1326                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1327         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1328                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1329                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1330                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1331         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1332                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1333                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1334                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1335         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1336                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1337                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1338                 RTE_PTYPE_INNER_L3_IPV4,
1339         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1340                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1341                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1342                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1343         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1344                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1345                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1346                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1347         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1348                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1349                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1350                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1351         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1352                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1353                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1354                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1355         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1356                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1357                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1358                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1359         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1360                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1361                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1362                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1363         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1364                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1365                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1366                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1367 };
1368
1369 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1370 static inline uint32_t
1371 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1372 {
1373
1374         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1375                 return RTE_PTYPE_UNKNOWN;
1376
1377         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1378
1379         /* For tunnel packet */
1380         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1381                 /* Remove the tunnel bit to save the space. */
1382                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1383                 return ptype_table_tn[pkt_info];
1384         }
1385
1386         /**
1387          * For x550, if it's not tunnel,
1388          * tunnel type bit should be set to 0.
1389          * Reuse 82599's mask.
1390          */
1391         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1392
1393         return ptype_table[pkt_info];
1394 }
1395
1396 static inline uint64_t
1397 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1398 {
1399         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1400                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1401                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1402                 PKT_RX_RSS_HASH, 0, 0, 0,
1403                 0, 0, 0,  PKT_RX_FDIR,
1404         };
1405 #ifdef RTE_LIBRTE_IEEE1588
1406         static uint64_t ip_pkt_etqf_map[8] = {
1407                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1408                 0, 0, 0, 0,
1409         };
1410
1411         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1412                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1413                                 ip_rss_types_map[pkt_info & 0XF];
1414         else
1415                 return ip_rss_types_map[pkt_info & 0XF];
1416 #else
1417         return ip_rss_types_map[pkt_info & 0XF];
1418 #endif
1419 }
1420
1421 static inline uint64_t
1422 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1423 {
1424         uint64_t pkt_flags;
1425
1426         /*
1427          * Check if VLAN present only.
1428          * Do not check whether L3/L4 rx checksum done by NIC or not,
1429          * That can be found from rte_eth_rxmode.offloads flag
1430          */
1431         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1432
1433 #ifdef RTE_LIBRTE_IEEE1588
1434         if (rx_status & IXGBE_RXD_STAT_TMST)
1435                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1436 #endif
1437         return pkt_flags;
1438 }
1439
1440 static inline uint64_t
1441 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1442 {
1443         uint64_t pkt_flags;
1444
1445         /*
1446          * Bit 31: IPE, IPv4 checksum error
1447          * Bit 30: L4I, L4I integrity error
1448          */
1449         static uint64_t error_to_pkt_flags_map[4] = {
1450                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1451                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1452                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1453                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1454         };
1455         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1456                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1457
1458         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1459             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1460                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1461         }
1462
1463 #ifdef RTE_LIBRTE_SECURITY
1464         if (rx_status & IXGBE_RXD_STAT_SECP) {
1465                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1466                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1467                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1468         }
1469 #endif
1470
1471         return pkt_flags;
1472 }
1473
1474 /*
1475  * LOOK_AHEAD defines how many desc statuses to check beyond the
1476  * current descriptor.
1477  * It must be a pound define for optimal performance.
1478  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1479  * function only works with LOOK_AHEAD=8.
1480  */
1481 #define LOOK_AHEAD 8
1482 #if (LOOK_AHEAD != 8)
1483 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1484 #endif
1485 static inline int
1486 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1487 {
1488         volatile union ixgbe_adv_rx_desc *rxdp;
1489         struct ixgbe_rx_entry *rxep;
1490         struct rte_mbuf *mb;
1491         uint16_t pkt_len;
1492         uint64_t pkt_flags;
1493         int nb_dd;
1494         uint32_t s[LOOK_AHEAD];
1495         uint32_t pkt_info[LOOK_AHEAD];
1496         int i, j, nb_rx = 0;
1497         uint32_t status;
1498         uint64_t vlan_flags = rxq->vlan_flags;
1499
1500         /* get references to current descriptor and S/W ring entry */
1501         rxdp = &rxq->rx_ring[rxq->rx_tail];
1502         rxep = &rxq->sw_ring[rxq->rx_tail];
1503
1504         status = rxdp->wb.upper.status_error;
1505         /* check to make sure there is at least 1 packet to receive */
1506         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1507                 return 0;
1508
1509         /*
1510          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1511          * reference packets that are ready to be received.
1512          */
1513         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1514              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1515                 /* Read desc statuses backwards to avoid race condition */
1516                 for (j = 0; j < LOOK_AHEAD; j++)
1517                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1518
1519                 rte_smp_rmb();
1520
1521                 /* Compute how many status bits were set */
1522                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1523                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1524                         ;
1525
1526                 for (j = 0; j < nb_dd; j++)
1527                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1528                                                        lo_dword.data);
1529
1530                 nb_rx += nb_dd;
1531
1532                 /* Translate descriptor info to mbuf format */
1533                 for (j = 0; j < nb_dd; ++j) {
1534                         mb = rxep[j].mbuf;
1535                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1536                                   rxq->crc_len;
1537                         mb->data_len = pkt_len;
1538                         mb->pkt_len = pkt_len;
1539                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1540
1541                         /* convert descriptor fields to rte mbuf flags */
1542                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1543                                 vlan_flags);
1544                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1545                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1546                                         ((uint16_t)pkt_info[j]);
1547                         mb->ol_flags = pkt_flags;
1548                         mb->packet_type =
1549                                 ixgbe_rxd_pkt_info_to_pkt_type
1550                                         (pkt_info[j], rxq->pkt_type_mask);
1551
1552                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1553                                 mb->hash.rss = rte_le_to_cpu_32(
1554                                     rxdp[j].wb.lower.hi_dword.rss);
1555                         else if (pkt_flags & PKT_RX_FDIR) {
1556                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1557                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1558                                     IXGBE_ATR_HASH_MASK;
1559                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1560                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1561                         }
1562                 }
1563
1564                 /* Move mbuf pointers from the S/W ring to the stage */
1565                 for (j = 0; j < LOOK_AHEAD; ++j) {
1566                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1567                 }
1568
1569                 /* stop if all requested packets could not be received */
1570                 if (nb_dd != LOOK_AHEAD)
1571                         break;
1572         }
1573
1574         /* clear software ring entries so we can cleanup correctly */
1575         for (i = 0; i < nb_rx; ++i) {
1576                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1577         }
1578
1579
1580         return nb_rx;
1581 }
1582
1583 static inline int
1584 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1585 {
1586         volatile union ixgbe_adv_rx_desc *rxdp;
1587         struct ixgbe_rx_entry *rxep;
1588         struct rte_mbuf *mb;
1589         uint16_t alloc_idx;
1590         __le64 dma_addr;
1591         int diag, i;
1592
1593         /* allocate buffers in bulk directly into the S/W ring */
1594         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1595         rxep = &rxq->sw_ring[alloc_idx];
1596         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1597                                     rxq->rx_free_thresh);
1598         if (unlikely(diag != 0))
1599                 return -ENOMEM;
1600
1601         rxdp = &rxq->rx_ring[alloc_idx];
1602         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1603                 /* populate the static rte mbuf fields */
1604                 mb = rxep[i].mbuf;
1605                 if (reset_mbuf) {
1606                         mb->port = rxq->port_id;
1607                 }
1608
1609                 rte_mbuf_refcnt_set(mb, 1);
1610                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1611
1612                 /* populate the descriptors */
1613                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1614                 rxdp[i].read.hdr_addr = 0;
1615                 rxdp[i].read.pkt_addr = dma_addr;
1616         }
1617
1618         /* update state of internal queue structure */
1619         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1620         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1621                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1622
1623         /* no errors */
1624         return 0;
1625 }
1626
1627 static inline uint16_t
1628 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1629                          uint16_t nb_pkts)
1630 {
1631         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1632         int i;
1633
1634         /* how many packets are ready to return? */
1635         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1636
1637         /* copy mbuf pointers to the application's packet list */
1638         for (i = 0; i < nb_pkts; ++i)
1639                 rx_pkts[i] = stage[i];
1640
1641         /* update internal queue state */
1642         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1643         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1644
1645         return nb_pkts;
1646 }
1647
1648 static inline uint16_t
1649 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1650              uint16_t nb_pkts)
1651 {
1652         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1653         uint16_t nb_rx = 0;
1654
1655         /* Any previously recv'd pkts will be returned from the Rx stage */
1656         if (rxq->rx_nb_avail)
1657                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1658
1659         /* Scan the H/W ring for packets to receive */
1660         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1661
1662         /* update internal queue state */
1663         rxq->rx_next_avail = 0;
1664         rxq->rx_nb_avail = nb_rx;
1665         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1666
1667         /* if required, allocate new buffers to replenish descriptors */
1668         if (rxq->rx_tail > rxq->rx_free_trigger) {
1669                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1670
1671                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1672                         int i, j;
1673
1674                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1675                                    "queue_id=%u", (unsigned) rxq->port_id,
1676                                    (unsigned) rxq->queue_id);
1677
1678                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1679                                 rxq->rx_free_thresh;
1680
1681                         /*
1682                          * Need to rewind any previous receives if we cannot
1683                          * allocate new buffers to replenish the old ones.
1684                          */
1685                         rxq->rx_nb_avail = 0;
1686                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1687                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1688                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1689
1690                         return 0;
1691                 }
1692
1693                 /* update tail pointer */
1694                 rte_wmb();
1695                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1696                                             cur_free_trigger);
1697         }
1698
1699         if (rxq->rx_tail >= rxq->nb_rx_desc)
1700                 rxq->rx_tail = 0;
1701
1702         /* received any packets this loop? */
1703         if (rxq->rx_nb_avail)
1704                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1705
1706         return 0;
1707 }
1708
1709 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1710 uint16_t
1711 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1712                            uint16_t nb_pkts)
1713 {
1714         uint16_t nb_rx;
1715
1716         if (unlikely(nb_pkts == 0))
1717                 return 0;
1718
1719         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1720                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1721
1722         /* request is relatively large, chunk it up */
1723         nb_rx = 0;
1724         while (nb_pkts) {
1725                 uint16_t ret, n;
1726
1727                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1728                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1729                 nb_rx = (uint16_t)(nb_rx + ret);
1730                 nb_pkts = (uint16_t)(nb_pkts - ret);
1731                 if (ret < n)
1732                         break;
1733         }
1734
1735         return nb_rx;
1736 }
1737
1738 uint16_t
1739 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1740                 uint16_t nb_pkts)
1741 {
1742         struct ixgbe_rx_queue *rxq;
1743         volatile union ixgbe_adv_rx_desc *rx_ring;
1744         volatile union ixgbe_adv_rx_desc *rxdp;
1745         struct ixgbe_rx_entry *sw_ring;
1746         struct ixgbe_rx_entry *rxe;
1747         struct rte_mbuf *rxm;
1748         struct rte_mbuf *nmb;
1749         union ixgbe_adv_rx_desc rxd;
1750         uint64_t dma_addr;
1751         uint32_t staterr;
1752         uint32_t pkt_info;
1753         uint16_t pkt_len;
1754         uint16_t rx_id;
1755         uint16_t nb_rx;
1756         uint16_t nb_hold;
1757         uint64_t pkt_flags;
1758         uint64_t vlan_flags;
1759
1760         nb_rx = 0;
1761         nb_hold = 0;
1762         rxq = rx_queue;
1763         rx_id = rxq->rx_tail;
1764         rx_ring = rxq->rx_ring;
1765         sw_ring = rxq->sw_ring;
1766         vlan_flags = rxq->vlan_flags;
1767         while (nb_rx < nb_pkts) {
1768                 /*
1769                  * The order of operations here is important as the DD status
1770                  * bit must not be read after any other descriptor fields.
1771                  * rx_ring and rxdp are pointing to volatile data so the order
1772                  * of accesses cannot be reordered by the compiler. If they were
1773                  * not volatile, they could be reordered which could lead to
1774                  * using invalid descriptor fields when read from rxd.
1775                  */
1776                 rxdp = &rx_ring[rx_id];
1777                 staterr = rxdp->wb.upper.status_error;
1778                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1779                         break;
1780                 rxd = *rxdp;
1781
1782                 /*
1783                  * End of packet.
1784                  *
1785                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1786                  * is likely to be invalid and to be dropped by the various
1787                  * validation checks performed by the network stack.
1788                  *
1789                  * Allocate a new mbuf to replenish the RX ring descriptor.
1790                  * If the allocation fails:
1791                  *    - arrange for that RX descriptor to be the first one
1792                  *      being parsed the next time the receive function is
1793                  *      invoked [on the same queue].
1794                  *
1795                  *    - Stop parsing the RX ring and return immediately.
1796                  *
1797                  * This policy do not drop the packet received in the RX
1798                  * descriptor for which the allocation of a new mbuf failed.
1799                  * Thus, it allows that packet to be later retrieved if
1800                  * mbuf have been freed in the mean time.
1801                  * As a side effect, holding RX descriptors instead of
1802                  * systematically giving them back to the NIC may lead to
1803                  * RX ring exhaustion situations.
1804                  * However, the NIC can gracefully prevent such situations
1805                  * to happen by sending specific "back-pressure" flow control
1806                  * frames to its peer(s).
1807                  */
1808                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1809                            "ext_err_stat=0x%08x pkt_len=%u",
1810                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1811                            (unsigned) rx_id, (unsigned) staterr,
1812                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1813
1814                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1815                 if (nmb == NULL) {
1816                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1817                                    "queue_id=%u", (unsigned) rxq->port_id,
1818                                    (unsigned) rxq->queue_id);
1819                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1820                         break;
1821                 }
1822
1823                 nb_hold++;
1824                 rxe = &sw_ring[rx_id];
1825                 rx_id++;
1826                 if (rx_id == rxq->nb_rx_desc)
1827                         rx_id = 0;
1828
1829                 /* Prefetch next mbuf while processing current one. */
1830                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1831
1832                 /*
1833                  * When next RX descriptor is on a cache-line boundary,
1834                  * prefetch the next 4 RX descriptors and the next 8 pointers
1835                  * to mbufs.
1836                  */
1837                 if ((rx_id & 0x3) == 0) {
1838                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1839                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1840                 }
1841
1842                 rxm = rxe->mbuf;
1843                 rxe->mbuf = nmb;
1844                 dma_addr =
1845                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1846                 rxdp->read.hdr_addr = 0;
1847                 rxdp->read.pkt_addr = dma_addr;
1848
1849                 /*
1850                  * Initialize the returned mbuf.
1851                  * 1) setup generic mbuf fields:
1852                  *    - number of segments,
1853                  *    - next segment,
1854                  *    - packet length,
1855                  *    - RX port identifier.
1856                  * 2) integrate hardware offload data, if any:
1857                  *    - RSS flag & hash,
1858                  *    - IP checksum flag,
1859                  *    - VLAN TCI, if any,
1860                  *    - error flags.
1861                  */
1862                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1863                                       rxq->crc_len);
1864                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1865                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1866                 rxm->nb_segs = 1;
1867                 rxm->next = NULL;
1868                 rxm->pkt_len = pkt_len;
1869                 rxm->data_len = pkt_len;
1870                 rxm->port = rxq->port_id;
1871
1872                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1873                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1874                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1875
1876                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1877                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1878                 pkt_flags = pkt_flags |
1879                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1880                 rxm->ol_flags = pkt_flags;
1881                 rxm->packet_type =
1882                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1883                                                        rxq->pkt_type_mask);
1884
1885                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1886                         rxm->hash.rss = rte_le_to_cpu_32(
1887                                                 rxd.wb.lower.hi_dword.rss);
1888                 else if (pkt_flags & PKT_RX_FDIR) {
1889                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1890                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1891                                         IXGBE_ATR_HASH_MASK;
1892                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1893                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1894                 }
1895                 /*
1896                  * Store the mbuf address into the next entry of the array
1897                  * of returned packets.
1898                  */
1899                 rx_pkts[nb_rx++] = rxm;
1900         }
1901         rxq->rx_tail = rx_id;
1902
1903         /*
1904          * If the number of free RX descriptors is greater than the RX free
1905          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1906          * register.
1907          * Update the RDT with the value of the last processed RX descriptor
1908          * minus 1, to guarantee that the RDT register is never equal to the
1909          * RDH register, which creates a "full" ring situtation from the
1910          * hardware point of view...
1911          */
1912         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1913         if (nb_hold > rxq->rx_free_thresh) {
1914                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1915                            "nb_hold=%u nb_rx=%u",
1916                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1917                            (unsigned) rx_id, (unsigned) nb_hold,
1918                            (unsigned) nb_rx);
1919                 rx_id = (uint16_t) ((rx_id == 0) ?
1920                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1921                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1922                 nb_hold = 0;
1923         }
1924         rxq->nb_rx_hold = nb_hold;
1925         return nb_rx;
1926 }
1927
1928 /**
1929  * Detect an RSC descriptor.
1930  */
1931 static inline uint32_t
1932 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1933 {
1934         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1935                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1936 }
1937
1938 /**
1939  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1940  *
1941  * Fill the following info in the HEAD buffer of the Rx cluster:
1942  *    - RX port identifier
1943  *    - hardware offload data, if any:
1944  *      - RSS flag & hash
1945  *      - IP checksum flag
1946  *      - VLAN TCI, if any
1947  *      - error flags
1948  * @head HEAD of the packet cluster
1949  * @desc HW descriptor to get data from
1950  * @rxq Pointer to the Rx queue
1951  */
1952 static inline void
1953 ixgbe_fill_cluster_head_buf(
1954         struct rte_mbuf *head,
1955         union ixgbe_adv_rx_desc *desc,
1956         struct ixgbe_rx_queue *rxq,
1957         uint32_t staterr)
1958 {
1959         uint32_t pkt_info;
1960         uint64_t pkt_flags;
1961
1962         head->port = rxq->port_id;
1963
1964         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1965          * set in the pkt_flags field.
1966          */
1967         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1968         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1969         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1970         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1971         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1972         head->ol_flags = pkt_flags;
1973         head->packet_type =
1974                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1975
1976         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1977                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1978         else if (pkt_flags & PKT_RX_FDIR) {
1979                 head->hash.fdir.hash =
1980                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1981                                                           & IXGBE_ATR_HASH_MASK;
1982                 head->hash.fdir.id =
1983                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1984         }
1985 }
1986
1987 /**
1988  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1989  *
1990  * @rx_queue Rx queue handle
1991  * @rx_pkts table of received packets
1992  * @nb_pkts size of rx_pkts table
1993  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1994  *
1995  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1996  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1997  *
1998  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1999  * 1) When non-EOP RSC completion arrives:
2000  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2001  *       segment's data length.
2002  *    b) Set the "next" pointer of the current segment to point to the segment
2003  *       at the NEXTP index.
2004  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2005  *       in the sw_rsc_ring.
2006  * 2) When EOP arrives we just update the cluster's total length and offload
2007  *    flags and deliver the cluster up to the upper layers. In our case - put it
2008  *    in the rx_pkts table.
2009  *
2010  * Returns the number of received packets/clusters (according to the "bulk
2011  * receive" interface).
2012  */
2013 static inline uint16_t
2014 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2015                     bool bulk_alloc)
2016 {
2017         struct ixgbe_rx_queue *rxq = rx_queue;
2018         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2019         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2020         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2021         uint16_t rx_id = rxq->rx_tail;
2022         uint16_t nb_rx = 0;
2023         uint16_t nb_hold = rxq->nb_rx_hold;
2024         uint16_t prev_id = rxq->rx_tail;
2025
2026         while (nb_rx < nb_pkts) {
2027                 bool eop;
2028                 struct ixgbe_rx_entry *rxe;
2029                 struct ixgbe_scattered_rx_entry *sc_entry;
2030                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2031                 struct ixgbe_rx_entry *next_rxe = NULL;
2032                 struct rte_mbuf *first_seg;
2033                 struct rte_mbuf *rxm;
2034                 struct rte_mbuf *nmb = NULL;
2035                 union ixgbe_adv_rx_desc rxd;
2036                 uint16_t data_len;
2037                 uint16_t next_id;
2038                 volatile union ixgbe_adv_rx_desc *rxdp;
2039                 uint32_t staterr;
2040
2041 next_desc:
2042                 /*
2043                  * The code in this whole file uses the volatile pointer to
2044                  * ensure the read ordering of the status and the rest of the
2045                  * descriptor fields (on the compiler level only!!!). This is so
2046                  * UGLY - why not to just use the compiler barrier instead? DPDK
2047                  * even has the rte_compiler_barrier() for that.
2048                  *
2049                  * But most importantly this is just wrong because this doesn't
2050                  * ensure memory ordering in a general case at all. For
2051                  * instance, DPDK is supposed to work on Power CPUs where
2052                  * compiler barrier may just not be enough!
2053                  *
2054                  * I tried to write only this function properly to have a
2055                  * starting point (as a part of an LRO/RSC series) but the
2056                  * compiler cursed at me when I tried to cast away the
2057                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2058                  * keeping it the way it is for now.
2059                  *
2060                  * The code in this file is broken in so many other places and
2061                  * will just not work on a big endian CPU anyway therefore the
2062                  * lines below will have to be revisited together with the rest
2063                  * of the ixgbe PMD.
2064                  *
2065                  * TODO:
2066                  *    - Get rid of "volatile" and let the compiler do its job.
2067                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2068                  *      memory ordering below.
2069                  */
2070                 rxdp = &rx_ring[rx_id];
2071                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2072
2073                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2074                         break;
2075
2076                 rxd = *rxdp;
2077
2078                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2079                                   "staterr=0x%x data_len=%u",
2080                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2081                            rte_le_to_cpu_16(rxd.wb.upper.length));
2082
2083                 if (!bulk_alloc) {
2084                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2085                         if (nmb == NULL) {
2086                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2087                                                   "port_id=%u queue_id=%u",
2088                                            rxq->port_id, rxq->queue_id);
2089
2090                                 rte_eth_devices[rxq->port_id].data->
2091                                                         rx_mbuf_alloc_failed++;
2092                                 break;
2093                         }
2094                 } else if (nb_hold > rxq->rx_free_thresh) {
2095                         uint16_t next_rdt = rxq->rx_free_trigger;
2096
2097                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2098                                 rte_wmb();
2099                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2100                                                             next_rdt);
2101                                 nb_hold -= rxq->rx_free_thresh;
2102                         } else {
2103                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2104                                                   "port_id=%u queue_id=%u",
2105                                            rxq->port_id, rxq->queue_id);
2106
2107                                 rte_eth_devices[rxq->port_id].data->
2108                                                         rx_mbuf_alloc_failed++;
2109                                 break;
2110                         }
2111                 }
2112
2113                 nb_hold++;
2114                 rxe = &sw_ring[rx_id];
2115                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2116
2117                 next_id = rx_id + 1;
2118                 if (next_id == rxq->nb_rx_desc)
2119                         next_id = 0;
2120
2121                 /* Prefetch next mbuf while processing current one. */
2122                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2123
2124                 /*
2125                  * When next RX descriptor is on a cache-line boundary,
2126                  * prefetch the next 4 RX descriptors and the next 4 pointers
2127                  * to mbufs.
2128                  */
2129                 if ((next_id & 0x3) == 0) {
2130                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2131                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2132                 }
2133
2134                 rxm = rxe->mbuf;
2135
2136                 if (!bulk_alloc) {
2137                         __le64 dma =
2138                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2139                         /*
2140                          * Update RX descriptor with the physical address of the
2141                          * new data buffer of the new allocated mbuf.
2142                          */
2143                         rxe->mbuf = nmb;
2144
2145                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2146                         rxdp->read.hdr_addr = 0;
2147                         rxdp->read.pkt_addr = dma;
2148                 } else
2149                         rxe->mbuf = NULL;
2150
2151                 /*
2152                  * Set data length & data buffer address of mbuf.
2153                  */
2154                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2155                 rxm->data_len = data_len;
2156
2157                 if (!eop) {
2158                         uint16_t nextp_id;
2159                         /*
2160                          * Get next descriptor index:
2161                          *  - For RSC it's in the NEXTP field.
2162                          *  - For a scattered packet - it's just a following
2163                          *    descriptor.
2164                          */
2165                         if (ixgbe_rsc_count(&rxd))
2166                                 nextp_id =
2167                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2168                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2169                         else
2170                                 nextp_id = next_id;
2171
2172                         next_sc_entry = &sw_sc_ring[nextp_id];
2173                         next_rxe = &sw_ring[nextp_id];
2174                         rte_ixgbe_prefetch(next_rxe);
2175                 }
2176
2177                 sc_entry = &sw_sc_ring[rx_id];
2178                 first_seg = sc_entry->fbuf;
2179                 sc_entry->fbuf = NULL;
2180
2181                 /*
2182                  * If this is the first buffer of the received packet,
2183                  * set the pointer to the first mbuf of the packet and
2184                  * initialize its context.
2185                  * Otherwise, update the total length and the number of segments
2186                  * of the current scattered packet, and update the pointer to
2187                  * the last mbuf of the current packet.
2188                  */
2189                 if (first_seg == NULL) {
2190                         first_seg = rxm;
2191                         first_seg->pkt_len = data_len;
2192                         first_seg->nb_segs = 1;
2193                 } else {
2194                         first_seg->pkt_len += data_len;
2195                         first_seg->nb_segs++;
2196                 }
2197
2198                 prev_id = rx_id;
2199                 rx_id = next_id;
2200
2201                 /*
2202                  * If this is not the last buffer of the received packet, update
2203                  * the pointer to the first mbuf at the NEXTP entry in the
2204                  * sw_sc_ring and continue to parse the RX ring.
2205                  */
2206                 if (!eop && next_rxe) {
2207                         rxm->next = next_rxe->mbuf;
2208                         next_sc_entry->fbuf = first_seg;
2209                         goto next_desc;
2210                 }
2211
2212                 /* Initialize the first mbuf of the returned packet */
2213                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2214
2215                 /*
2216                  * Deal with the case, when HW CRC srip is disabled.
2217                  * That can't happen when LRO is enabled, but still could
2218                  * happen for scattered RX mode.
2219                  */
2220                 first_seg->pkt_len -= rxq->crc_len;
2221                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2222                         struct rte_mbuf *lp;
2223
2224                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2225                                 ;
2226
2227                         first_seg->nb_segs--;
2228                         lp->data_len -= rxq->crc_len - rxm->data_len;
2229                         lp->next = NULL;
2230                         rte_pktmbuf_free_seg(rxm);
2231                 } else
2232                         rxm->data_len -= rxq->crc_len;
2233
2234                 /* Prefetch data of first segment, if configured to do so. */
2235                 rte_packet_prefetch((char *)first_seg->buf_addr +
2236                         first_seg->data_off);
2237
2238                 /*
2239                  * Store the mbuf address into the next entry of the array
2240                  * of returned packets.
2241                  */
2242                 rx_pkts[nb_rx++] = first_seg;
2243         }
2244
2245         /*
2246          * Record index of the next RX descriptor to probe.
2247          */
2248         rxq->rx_tail = rx_id;
2249
2250         /*
2251          * If the number of free RX descriptors is greater than the RX free
2252          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2253          * register.
2254          * Update the RDT with the value of the last processed RX descriptor
2255          * minus 1, to guarantee that the RDT register is never equal to the
2256          * RDH register, which creates a "full" ring situtation from the
2257          * hardware point of view...
2258          */
2259         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2260                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2261                            "nb_hold=%u nb_rx=%u",
2262                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2263
2264                 rte_wmb();
2265                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2266                 nb_hold = 0;
2267         }
2268
2269         rxq->nb_rx_hold = nb_hold;
2270         return nb_rx;
2271 }
2272
2273 uint16_t
2274 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2275                                  uint16_t nb_pkts)
2276 {
2277         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2278 }
2279
2280 uint16_t
2281 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2282                                uint16_t nb_pkts)
2283 {
2284         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2285 }
2286
2287 /*********************************************************************
2288  *
2289  *  Queue management functions
2290  *
2291  **********************************************************************/
2292
2293 static void __rte_cold
2294 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2295 {
2296         unsigned i;
2297
2298         if (txq->sw_ring != NULL) {
2299                 for (i = 0; i < txq->nb_tx_desc; i++) {
2300                         if (txq->sw_ring[i].mbuf != NULL) {
2301                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2302                                 txq->sw_ring[i].mbuf = NULL;
2303                         }
2304                 }
2305         }
2306 }
2307
2308 static int
2309 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2310 {
2311         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2312         uint16_t i, tx_last, tx_id;
2313         uint16_t nb_tx_free_last;
2314         uint16_t nb_tx_to_clean;
2315         uint32_t pkt_cnt;
2316
2317         /* Start free mbuf from the next of tx_tail */
2318         tx_last = txq->tx_tail;
2319         tx_id  = swr_ring[tx_last].next_id;
2320
2321         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2322                 return 0;
2323
2324         nb_tx_to_clean = txq->nb_tx_free;
2325         nb_tx_free_last = txq->nb_tx_free;
2326         if (!free_cnt)
2327                 free_cnt = txq->nb_tx_desc;
2328
2329         /* Loop through swr_ring to count the amount of
2330          * freeable mubfs and packets.
2331          */
2332         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2333                 for (i = 0; i < nb_tx_to_clean &&
2334                         pkt_cnt < free_cnt &&
2335                         tx_id != tx_last; i++) {
2336                         if (swr_ring[tx_id].mbuf != NULL) {
2337                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2338                                 swr_ring[tx_id].mbuf = NULL;
2339
2340                                 /*
2341                                  * last segment in the packet,
2342                                  * increment packet count
2343                                  */
2344                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2345                         }
2346
2347                         tx_id = swr_ring[tx_id].next_id;
2348                 }
2349
2350                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2351                         txq->nb_tx_free || tx_id == tx_last)
2352                         break;
2353
2354                 if (pkt_cnt < free_cnt) {
2355                         if (ixgbe_xmit_cleanup(txq))
2356                                 break;
2357
2358                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2359                         nb_tx_free_last = txq->nb_tx_free;
2360                 }
2361         }
2362
2363         return (int)pkt_cnt;
2364 }
2365
2366 static int
2367 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2368                         uint32_t free_cnt)
2369 {
2370         int i, n, cnt;
2371
2372         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2373                 free_cnt = txq->nb_tx_desc;
2374
2375         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2376
2377         for (i = 0; i < cnt; i += n) {
2378                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2379                         break;
2380
2381                 n = ixgbe_tx_free_bufs(txq);
2382
2383                 if (n == 0)
2384                         break;
2385         }
2386
2387         return i;
2388 }
2389
2390 static int
2391 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2392                         uint32_t free_cnt __rte_unused)
2393 {
2394         return -ENOTSUP;
2395 }
2396
2397 int
2398 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2399 {
2400         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2401         if (txq->offloads == 0 &&
2402 #ifdef RTE_LIBRTE_SECURITY
2403                         !(txq->using_ipsec) &&
2404 #endif
2405                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2406                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2407                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2408                                         txq->sw_ring_v != NULL)) {
2409                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2410                 } else {
2411                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2412                 }
2413         }
2414
2415         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2416 }
2417
2418 static void __rte_cold
2419 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2420 {
2421         if (txq != NULL &&
2422             txq->sw_ring != NULL)
2423                 rte_free(txq->sw_ring);
2424 }
2425
2426 static void __rte_cold
2427 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2428 {
2429         if (txq != NULL && txq->ops != NULL) {
2430                 txq->ops->release_mbufs(txq);
2431                 txq->ops->free_swring(txq);
2432                 rte_free(txq);
2433         }
2434 }
2435
2436 void __rte_cold
2437 ixgbe_dev_tx_queue_release(void *txq)
2438 {
2439         ixgbe_tx_queue_release(txq);
2440 }
2441
2442 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2443 static void __rte_cold
2444 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2445 {
2446         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2447         struct ixgbe_tx_entry *txe = txq->sw_ring;
2448         uint16_t prev, i;
2449
2450         /* Zero out HW ring memory */
2451         for (i = 0; i < txq->nb_tx_desc; i++) {
2452                 txq->tx_ring[i] = zeroed_desc;
2453         }
2454
2455         /* Initialize SW ring entries */
2456         prev = (uint16_t) (txq->nb_tx_desc - 1);
2457         for (i = 0; i < txq->nb_tx_desc; i++) {
2458                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2459
2460                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2461                 txe[i].mbuf = NULL;
2462                 txe[i].last_id = i;
2463                 txe[prev].next_id = i;
2464                 prev = i;
2465         }
2466
2467         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2468         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2469
2470         txq->tx_tail = 0;
2471         txq->nb_tx_used = 0;
2472         /*
2473          * Always allow 1 descriptor to be un-allocated to avoid
2474          * a H/W race condition
2475          */
2476         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2477         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2478         txq->ctx_curr = 0;
2479         memset((void *)&txq->ctx_cache, 0,
2480                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2481 }
2482
2483 static const struct ixgbe_txq_ops def_txq_ops = {
2484         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2485         .free_swring = ixgbe_tx_free_swring,
2486         .reset = ixgbe_reset_tx_queue,
2487 };
2488
2489 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2490  * the queue parameters. Used in tx_queue_setup by primary process and then
2491  * in dev_init by secondary process when attaching to an existing ethdev.
2492  */
2493 void __rte_cold
2494 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2495 {
2496         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2497         if ((txq->offloads == 0) &&
2498 #ifdef RTE_LIBRTE_SECURITY
2499                         !(txq->using_ipsec) &&
2500 #endif
2501                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2502                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2503                 dev->tx_pkt_prepare = NULL;
2504                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2505                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2506                                         ixgbe_txq_vec_setup(txq) == 0)) {
2507                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2508                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2509                 } else
2510                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2511         } else {
2512                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2513                 PMD_INIT_LOG(DEBUG,
2514                                 " - offloads = 0x%" PRIx64,
2515                                 txq->offloads);
2516                 PMD_INIT_LOG(DEBUG,
2517                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2518                                 (unsigned long)txq->tx_rs_thresh,
2519                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2520                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2521                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2522         }
2523 }
2524
2525 uint64_t
2526 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2527 {
2528         RTE_SET_USED(dev);
2529
2530         return 0;
2531 }
2532
2533 uint64_t
2534 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2535 {
2536         uint64_t tx_offload_capa;
2537         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2538
2539         tx_offload_capa =
2540                 DEV_TX_OFFLOAD_VLAN_INSERT |
2541                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2542                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2543                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2544                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2545                 DEV_TX_OFFLOAD_TCP_TSO     |
2546                 DEV_TX_OFFLOAD_MULTI_SEGS;
2547
2548         if (hw->mac.type == ixgbe_mac_82599EB ||
2549             hw->mac.type == ixgbe_mac_X540)
2550                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2551
2552         if (hw->mac.type == ixgbe_mac_X550 ||
2553             hw->mac.type == ixgbe_mac_X550EM_x ||
2554             hw->mac.type == ixgbe_mac_X550EM_a)
2555                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2556
2557 #ifdef RTE_LIBRTE_SECURITY
2558         if (dev->security_ctx)
2559                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2560 #endif
2561         return tx_offload_capa;
2562 }
2563
2564 int __rte_cold
2565 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2566                          uint16_t queue_idx,
2567                          uint16_t nb_desc,
2568                          unsigned int socket_id,
2569                          const struct rte_eth_txconf *tx_conf)
2570 {
2571         const struct rte_memzone *tz;
2572         struct ixgbe_tx_queue *txq;
2573         struct ixgbe_hw     *hw;
2574         uint16_t tx_rs_thresh, tx_free_thresh;
2575         uint64_t offloads;
2576
2577         PMD_INIT_FUNC_TRACE();
2578         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2579
2580         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2581
2582         /*
2583          * Validate number of transmit descriptors.
2584          * It must not exceed hardware maximum, and must be multiple
2585          * of IXGBE_ALIGN.
2586          */
2587         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2588                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2589                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2590                 return -EINVAL;
2591         }
2592
2593         /*
2594          * The following two parameters control the setting of the RS bit on
2595          * transmit descriptors.
2596          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2597          * descriptors have been used.
2598          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2599          * descriptors are used or if the number of descriptors required
2600          * to transmit a packet is greater than the number of free TX
2601          * descriptors.
2602          * The following constraints must be satisfied:
2603          *  tx_rs_thresh must be greater than 0.
2604          *  tx_rs_thresh must be less than the size of the ring minus 2.
2605          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2606          *  tx_rs_thresh must be a divisor of the ring size.
2607          *  tx_free_thresh must be greater than 0.
2608          *  tx_free_thresh must be less than the size of the ring minus 3.
2609          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2610          * One descriptor in the TX ring is used as a sentinel to avoid a
2611          * H/W race condition, hence the maximum threshold constraints.
2612          * When set to zero use default values.
2613          */
2614         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2615                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2616         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2617         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2618                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2619         if (tx_conf->tx_rs_thresh > 0)
2620                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2621         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2622                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2623                              "exceed nb_desc. (tx_rs_thresh=%u "
2624                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2625                              (unsigned int)tx_rs_thresh,
2626                              (unsigned int)tx_free_thresh,
2627                              (unsigned int)nb_desc,
2628                              (int)dev->data->port_id,
2629                              (int)queue_idx);
2630                 return -(EINVAL);
2631         }
2632         if (tx_rs_thresh >= (nb_desc - 2)) {
2633                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2634                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2635                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2636                         (int)dev->data->port_id, (int)queue_idx);
2637                 return -(EINVAL);
2638         }
2639         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2640                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2641                         "(tx_rs_thresh=%u port=%d queue=%d)",
2642                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2643                         (int)dev->data->port_id, (int)queue_idx);
2644                 return -(EINVAL);
2645         }
2646         if (tx_free_thresh >= (nb_desc - 3)) {
2647                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2648                              "tx_free_thresh must be less than the number of "
2649                              "TX descriptors minus 3. (tx_free_thresh=%u "
2650                              "port=%d queue=%d)",
2651                              (unsigned int)tx_free_thresh,
2652                              (int)dev->data->port_id, (int)queue_idx);
2653                 return -(EINVAL);
2654         }
2655         if (tx_rs_thresh > tx_free_thresh) {
2656                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2657                              "tx_free_thresh. (tx_free_thresh=%u "
2658                              "tx_rs_thresh=%u port=%d queue=%d)",
2659                              (unsigned int)tx_free_thresh,
2660                              (unsigned int)tx_rs_thresh,
2661                              (int)dev->data->port_id,
2662                              (int)queue_idx);
2663                 return -(EINVAL);
2664         }
2665         if ((nb_desc % tx_rs_thresh) != 0) {
2666                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2667                              "number of TX descriptors. (tx_rs_thresh=%u "
2668                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2669                              (int)dev->data->port_id, (int)queue_idx);
2670                 return -(EINVAL);
2671         }
2672
2673         /*
2674          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2675          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2676          * by the NIC and all descriptors are written back after the NIC
2677          * accumulates WTHRESH descriptors.
2678          */
2679         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2680                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2681                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2682                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2683                              (int)dev->data->port_id, (int)queue_idx);
2684                 return -(EINVAL);
2685         }
2686
2687         /* Free memory prior to re-allocation if needed... */
2688         if (dev->data->tx_queues[queue_idx] != NULL) {
2689                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2690                 dev->data->tx_queues[queue_idx] = NULL;
2691         }
2692
2693         /* First allocate the tx queue data structure */
2694         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2695                                  RTE_CACHE_LINE_SIZE, socket_id);
2696         if (txq == NULL)
2697                 return -ENOMEM;
2698
2699         /*
2700          * Allocate TX ring hardware descriptors. A memzone large enough to
2701          * handle the maximum ring size is allocated in order to allow for
2702          * resizing in later calls to the queue setup function.
2703          */
2704         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2705                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2706                         IXGBE_ALIGN, socket_id);
2707         if (tz == NULL) {
2708                 ixgbe_tx_queue_release(txq);
2709                 return -ENOMEM;
2710         }
2711
2712         txq->nb_tx_desc = nb_desc;
2713         txq->tx_rs_thresh = tx_rs_thresh;
2714         txq->tx_free_thresh = tx_free_thresh;
2715         txq->pthresh = tx_conf->tx_thresh.pthresh;
2716         txq->hthresh = tx_conf->tx_thresh.hthresh;
2717         txq->wthresh = tx_conf->tx_thresh.wthresh;
2718         txq->queue_id = queue_idx;
2719         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2720                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2721         txq->port_id = dev->data->port_id;
2722         txq->offloads = offloads;
2723         txq->ops = &def_txq_ops;
2724         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2725 #ifdef RTE_LIBRTE_SECURITY
2726         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2727                         DEV_TX_OFFLOAD_SECURITY);
2728 #endif
2729
2730         /*
2731          * Modification to set VFTDT for virtual function if vf is detected
2732          */
2733         if (hw->mac.type == ixgbe_mac_82599_vf ||
2734             hw->mac.type == ixgbe_mac_X540_vf ||
2735             hw->mac.type == ixgbe_mac_X550_vf ||
2736             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2737             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2738                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2739         else
2740                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2741
2742         txq->tx_ring_phys_addr = tz->iova;
2743         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2744
2745         /* Allocate software ring */
2746         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2747                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2748                                 RTE_CACHE_LINE_SIZE, socket_id);
2749         if (txq->sw_ring == NULL) {
2750                 ixgbe_tx_queue_release(txq);
2751                 return -ENOMEM;
2752         }
2753         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2754                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2755
2756         /* set up vector or scalar TX function as appropriate */
2757         ixgbe_set_tx_function(dev, txq);
2758
2759         txq->ops->reset(txq);
2760
2761         dev->data->tx_queues[queue_idx] = txq;
2762
2763
2764         return 0;
2765 }
2766
2767 /**
2768  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2769  *
2770  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2771  * in the sw_rsc_ring is not set to NULL but rather points to the next
2772  * mbuf of this RSC aggregation (that has not been completed yet and still
2773  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2774  * will just free first "nb_segs" segments of the cluster explicitly by calling
2775  * an rte_pktmbuf_free_seg().
2776  *
2777  * @m scattered cluster head
2778  */
2779 static void __rte_cold
2780 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2781 {
2782         uint16_t i, nb_segs = m->nb_segs;
2783         struct rte_mbuf *next_seg;
2784
2785         for (i = 0; i < nb_segs; i++) {
2786                 next_seg = m->next;
2787                 rte_pktmbuf_free_seg(m);
2788                 m = next_seg;
2789         }
2790 }
2791
2792 static void __rte_cold
2793 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2794 {
2795         unsigned i;
2796
2797         /* SSE Vector driver has a different way of releasing mbufs. */
2798         if (rxq->rx_using_sse) {
2799                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2800                 return;
2801         }
2802
2803         if (rxq->sw_ring != NULL) {
2804                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2805                         if (rxq->sw_ring[i].mbuf != NULL) {
2806                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2807                                 rxq->sw_ring[i].mbuf = NULL;
2808                         }
2809                 }
2810                 if (rxq->rx_nb_avail) {
2811                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2812                                 struct rte_mbuf *mb;
2813
2814                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2815                                 rte_pktmbuf_free_seg(mb);
2816                         }
2817                         rxq->rx_nb_avail = 0;
2818                 }
2819         }
2820
2821         if (rxq->sw_sc_ring)
2822                 for (i = 0; i < rxq->nb_rx_desc; i++)
2823                         if (rxq->sw_sc_ring[i].fbuf) {
2824                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2825                                 rxq->sw_sc_ring[i].fbuf = NULL;
2826                         }
2827 }
2828
2829 static void __rte_cold
2830 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2831 {
2832         if (rxq != NULL) {
2833                 ixgbe_rx_queue_release_mbufs(rxq);
2834                 rte_free(rxq->sw_ring);
2835                 rte_free(rxq->sw_sc_ring);
2836                 rte_free(rxq);
2837         }
2838 }
2839
2840 void __rte_cold
2841 ixgbe_dev_rx_queue_release(void *rxq)
2842 {
2843         ixgbe_rx_queue_release(rxq);
2844 }
2845
2846 /*
2847  * Check if Rx Burst Bulk Alloc function can be used.
2848  * Return
2849  *        0: the preconditions are satisfied and the bulk allocation function
2850  *           can be used.
2851  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2852  *           function must be used.
2853  */
2854 static inline int __rte_cold
2855 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2856 {
2857         int ret = 0;
2858
2859         /*
2860          * Make sure the following pre-conditions are satisfied:
2861          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2862          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2863          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2864          * Scattered packets are not supported.  This should be checked
2865          * outside of this function.
2866          */
2867         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2868                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2869                              "rxq->rx_free_thresh=%d, "
2870                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2871                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2872                 ret = -EINVAL;
2873         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2874                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2875                              "rxq->rx_free_thresh=%d, "
2876                              "rxq->nb_rx_desc=%d",
2877                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2878                 ret = -EINVAL;
2879         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2880                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2881                              "rxq->nb_rx_desc=%d, "
2882                              "rxq->rx_free_thresh=%d",
2883                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2884                 ret = -EINVAL;
2885         }
2886
2887         return ret;
2888 }
2889
2890 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2891 static void __rte_cold
2892 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2893 {
2894         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2895         unsigned i;
2896         uint16_t len = rxq->nb_rx_desc;
2897
2898         /*
2899          * By default, the Rx queue setup function allocates enough memory for
2900          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2901          * extra memory at the end of the descriptor ring to be zero'd out.
2902          */
2903         if (adapter->rx_bulk_alloc_allowed)
2904                 /* zero out extra memory */
2905                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2906
2907         /*
2908          * Zero out HW ring memory. Zero out extra memory at the end of
2909          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2910          * reads extra memory as zeros.
2911          */
2912         for (i = 0; i < len; i++) {
2913                 rxq->rx_ring[i] = zeroed_desc;
2914         }
2915
2916         /*
2917          * initialize extra software ring entries. Space for these extra
2918          * entries is always allocated
2919          */
2920         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2921         for (i = rxq->nb_rx_desc; i < len; ++i) {
2922                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2923         }
2924
2925         rxq->rx_nb_avail = 0;
2926         rxq->rx_next_avail = 0;
2927         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2928         rxq->rx_tail = 0;
2929         rxq->nb_rx_hold = 0;
2930         rxq->pkt_first_seg = NULL;
2931         rxq->pkt_last_seg = NULL;
2932
2933 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2934         rxq->rxrearm_start = 0;
2935         rxq->rxrearm_nb = 0;
2936 #endif
2937 }
2938
2939 static int
2940 ixgbe_is_vf(struct rte_eth_dev *dev)
2941 {
2942         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2943
2944         switch (hw->mac.type) {
2945         case ixgbe_mac_82599_vf:
2946         case ixgbe_mac_X540_vf:
2947         case ixgbe_mac_X550_vf:
2948         case ixgbe_mac_X550EM_x_vf:
2949         case ixgbe_mac_X550EM_a_vf:
2950                 return 1;
2951         default:
2952                 return 0;
2953         }
2954 }
2955
2956 uint64_t
2957 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2958 {
2959         uint64_t offloads = 0;
2960         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2961
2962         if (hw->mac.type != ixgbe_mac_82598EB)
2963                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2964
2965         return offloads;
2966 }
2967
2968 uint64_t
2969 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2970 {
2971         uint64_t offloads;
2972         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2973
2974         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2975                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2976                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2977                    DEV_RX_OFFLOAD_KEEP_CRC    |
2978                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2979                    DEV_RX_OFFLOAD_VLAN_FILTER |
2980                    DEV_RX_OFFLOAD_SCATTER |
2981                    DEV_RX_OFFLOAD_RSS_HASH;
2982
2983         if (hw->mac.type == ixgbe_mac_82598EB)
2984                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2985
2986         if (ixgbe_is_vf(dev) == 0)
2987                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2988
2989         /*
2990          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2991          * mode.
2992          */
2993         if ((hw->mac.type == ixgbe_mac_82599EB ||
2994              hw->mac.type == ixgbe_mac_X540 ||
2995              hw->mac.type == ixgbe_mac_X550) &&
2996             !RTE_ETH_DEV_SRIOV(dev).active)
2997                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
2998
2999         if (hw->mac.type == ixgbe_mac_82599EB ||
3000             hw->mac.type == ixgbe_mac_X540)
3001                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3002
3003         if (hw->mac.type == ixgbe_mac_X550 ||
3004             hw->mac.type == ixgbe_mac_X550EM_x ||
3005             hw->mac.type == ixgbe_mac_X550EM_a)
3006                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3007
3008 #ifdef RTE_LIBRTE_SECURITY
3009         if (dev->security_ctx)
3010                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3011 #endif
3012
3013         return offloads;
3014 }
3015
3016 int __rte_cold
3017 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3018                          uint16_t queue_idx,
3019                          uint16_t nb_desc,
3020                          unsigned int socket_id,
3021                          const struct rte_eth_rxconf *rx_conf,
3022                          struct rte_mempool *mp)
3023 {
3024         const struct rte_memzone *rz;
3025         struct ixgbe_rx_queue *rxq;
3026         struct ixgbe_hw     *hw;
3027         uint16_t len;
3028         struct ixgbe_adapter *adapter = dev->data->dev_private;
3029         uint64_t offloads;
3030
3031         PMD_INIT_FUNC_TRACE();
3032         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3033
3034         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3035
3036         /*
3037          * Validate number of receive descriptors.
3038          * It must not exceed hardware maximum, and must be multiple
3039          * of IXGBE_ALIGN.
3040          */
3041         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3042                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3043                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3044                 return -EINVAL;
3045         }
3046
3047         /* Free memory prior to re-allocation if needed... */
3048         if (dev->data->rx_queues[queue_idx] != NULL) {
3049                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3050                 dev->data->rx_queues[queue_idx] = NULL;
3051         }
3052
3053         /* First allocate the rx queue data structure */
3054         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3055                                  RTE_CACHE_LINE_SIZE, socket_id);
3056         if (rxq == NULL)
3057                 return -ENOMEM;
3058         rxq->mb_pool = mp;
3059         rxq->nb_rx_desc = nb_desc;
3060         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3061         rxq->queue_id = queue_idx;
3062         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3063                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3064         rxq->port_id = dev->data->port_id;
3065         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3066                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3067         else
3068                 rxq->crc_len = 0;
3069         rxq->drop_en = rx_conf->rx_drop_en;
3070         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3071         rxq->offloads = offloads;
3072
3073         /*
3074          * The packet type in RX descriptor is different for different NICs.
3075          * Some bits are used for x550 but reserved for other NICS.
3076          * So set different masks for different NICs.
3077          */
3078         if (hw->mac.type == ixgbe_mac_X550 ||
3079             hw->mac.type == ixgbe_mac_X550EM_x ||
3080             hw->mac.type == ixgbe_mac_X550EM_a ||
3081             hw->mac.type == ixgbe_mac_X550_vf ||
3082             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3083             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3084                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3085         else
3086                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3087
3088         /*
3089          * Allocate RX ring hardware descriptors. A memzone large enough to
3090          * handle the maximum ring size is allocated in order to allow for
3091          * resizing in later calls to the queue setup function.
3092          */
3093         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3094                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3095         if (rz == NULL) {
3096                 ixgbe_rx_queue_release(rxq);
3097                 return -ENOMEM;
3098         }
3099
3100         /*
3101          * Zero init all the descriptors in the ring.
3102          */
3103         memset(rz->addr, 0, RX_RING_SZ);
3104
3105         /*
3106          * Modified to setup VFRDT for Virtual Function
3107          */
3108         if (hw->mac.type == ixgbe_mac_82599_vf ||
3109             hw->mac.type == ixgbe_mac_X540_vf ||
3110             hw->mac.type == ixgbe_mac_X550_vf ||
3111             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3112             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3113                 rxq->rdt_reg_addr =
3114                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3115                 rxq->rdh_reg_addr =
3116                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3117         } else {
3118                 rxq->rdt_reg_addr =
3119                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3120                 rxq->rdh_reg_addr =
3121                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3122         }
3123
3124         rxq->rx_ring_phys_addr = rz->iova;
3125         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3126
3127         /*
3128          * Certain constraints must be met in order to use the bulk buffer
3129          * allocation Rx burst function. If any of Rx queues doesn't meet them
3130          * the feature should be disabled for the whole port.
3131          */
3132         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3133                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3134                                     "preconditions - canceling the feature for "
3135                                     "the whole port[%d]",
3136                              rxq->queue_id, rxq->port_id);
3137                 adapter->rx_bulk_alloc_allowed = false;
3138         }
3139
3140         /*
3141          * Allocate software ring. Allow for space at the end of the
3142          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3143          * function does not access an invalid memory region.
3144          */
3145         len = nb_desc;
3146         if (adapter->rx_bulk_alloc_allowed)
3147                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3148
3149         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3150                                           sizeof(struct ixgbe_rx_entry) * len,
3151                                           RTE_CACHE_LINE_SIZE, socket_id);
3152         if (!rxq->sw_ring) {
3153                 ixgbe_rx_queue_release(rxq);
3154                 return -ENOMEM;
3155         }
3156
3157         /*
3158          * Always allocate even if it's not going to be needed in order to
3159          * simplify the code.
3160          *
3161          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3162          * be requested in ixgbe_dev_rx_init(), which is called later from
3163          * dev_start() flow.
3164          */
3165         rxq->sw_sc_ring =
3166                 rte_zmalloc_socket("rxq->sw_sc_ring",
3167                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3168                                    RTE_CACHE_LINE_SIZE, socket_id);
3169         if (!rxq->sw_sc_ring) {
3170                 ixgbe_rx_queue_release(rxq);
3171                 return -ENOMEM;
3172         }
3173
3174         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3175                             "dma_addr=0x%"PRIx64,
3176                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3177                      rxq->rx_ring_phys_addr);
3178
3179         if (!rte_is_power_of_2(nb_desc)) {
3180                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3181                                     "preconditions - canceling the feature for "
3182                                     "the whole port[%d]",
3183                              rxq->queue_id, rxq->port_id);
3184                 adapter->rx_vec_allowed = false;
3185         } else
3186                 ixgbe_rxq_vec_setup(rxq);
3187
3188         dev->data->rx_queues[queue_idx] = rxq;
3189
3190         ixgbe_reset_rx_queue(adapter, rxq);
3191
3192         return 0;
3193 }
3194
3195 uint32_t
3196 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3197 {
3198 #define IXGBE_RXQ_SCAN_INTERVAL 4
3199         volatile union ixgbe_adv_rx_desc *rxdp;
3200         struct ixgbe_rx_queue *rxq;
3201         uint32_t desc = 0;
3202
3203         rxq = dev->data->rx_queues[rx_queue_id];
3204         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3205
3206         while ((desc < rxq->nb_rx_desc) &&
3207                 (rxdp->wb.upper.status_error &
3208                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3209                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3210                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3211                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3212                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3213                                 desc - rxq->nb_rx_desc]);
3214         }
3215
3216         return desc;
3217 }
3218
3219 int
3220 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3221 {
3222         volatile union ixgbe_adv_rx_desc *rxdp;
3223         struct ixgbe_rx_queue *rxq = rx_queue;
3224         uint32_t desc;
3225
3226         if (unlikely(offset >= rxq->nb_rx_desc))
3227                 return 0;
3228         desc = rxq->rx_tail + offset;
3229         if (desc >= rxq->nb_rx_desc)
3230                 desc -= rxq->nb_rx_desc;
3231
3232         rxdp = &rxq->rx_ring[desc];
3233         return !!(rxdp->wb.upper.status_error &
3234                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3235 }
3236
3237 int
3238 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3239 {
3240         struct ixgbe_rx_queue *rxq = rx_queue;
3241         volatile uint32_t *status;
3242         uint32_t nb_hold, desc;
3243
3244         if (unlikely(offset >= rxq->nb_rx_desc))
3245                 return -EINVAL;
3246
3247 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3248         if (rxq->rx_using_sse)
3249                 nb_hold = rxq->rxrearm_nb;
3250         else
3251 #endif
3252                 nb_hold = rxq->nb_rx_hold;
3253         if (offset >= rxq->nb_rx_desc - nb_hold)
3254                 return RTE_ETH_RX_DESC_UNAVAIL;
3255
3256         desc = rxq->rx_tail + offset;
3257         if (desc >= rxq->nb_rx_desc)
3258                 desc -= rxq->nb_rx_desc;
3259
3260         status = &rxq->rx_ring[desc].wb.upper.status_error;
3261         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3262                 return RTE_ETH_RX_DESC_DONE;
3263
3264         return RTE_ETH_RX_DESC_AVAIL;
3265 }
3266
3267 int
3268 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3269 {
3270         struct ixgbe_tx_queue *txq = tx_queue;
3271         volatile uint32_t *status;
3272         uint32_t desc;
3273
3274         if (unlikely(offset >= txq->nb_tx_desc))
3275                 return -EINVAL;
3276
3277         desc = txq->tx_tail + offset;
3278         /* go to next desc that has the RS bit */
3279         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3280                 txq->tx_rs_thresh;
3281         if (desc >= txq->nb_tx_desc) {
3282                 desc -= txq->nb_tx_desc;
3283                 if (desc >= txq->nb_tx_desc)
3284                         desc -= txq->nb_tx_desc;
3285         }
3286
3287         status = &txq->tx_ring[desc].wb.status;
3288         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3289                 return RTE_ETH_TX_DESC_DONE;
3290
3291         return RTE_ETH_TX_DESC_FULL;
3292 }
3293
3294 /*
3295  * Set up link loopback for X540/X550 mode Tx->Rx.
3296  */
3297 static inline void __rte_cold
3298 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3299 {
3300         uint32_t macc;
3301         PMD_INIT_FUNC_TRACE();
3302
3303         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3304
3305         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3306                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3307         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3308
3309         if (enable) {
3310                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3311                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3312                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3313                 macc |= IXGBE_MACC_FLU;
3314         } else {
3315                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3316                 macc &= ~IXGBE_MACC_FLU;
3317         }
3318
3319         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3320                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3321
3322         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3323 }
3324
3325 void __rte_cold
3326 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3327 {
3328         unsigned i;
3329         struct ixgbe_adapter *adapter = dev->data->dev_private;
3330         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3331
3332         PMD_INIT_FUNC_TRACE();
3333
3334         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3335                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3336
3337                 if (txq != NULL) {
3338                         txq->ops->release_mbufs(txq);
3339                         txq->ops->reset(txq);
3340                 }
3341         }
3342
3343         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3344                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3345
3346                 if (rxq != NULL) {
3347                         ixgbe_rx_queue_release_mbufs(rxq);
3348                         ixgbe_reset_rx_queue(adapter, rxq);
3349                 }
3350         }
3351         /* If loopback mode was enabled, reconfigure the link accordingly */
3352         if (dev->data->dev_conf.lpbk_mode != 0) {
3353                 if (hw->mac.type == ixgbe_mac_X540 ||
3354                      hw->mac.type == ixgbe_mac_X550 ||
3355                      hw->mac.type == ixgbe_mac_X550EM_x ||
3356                      hw->mac.type == ixgbe_mac_X550EM_a)
3357                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3358         }
3359 }
3360
3361 void
3362 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3363 {
3364         unsigned i;
3365
3366         PMD_INIT_FUNC_TRACE();
3367
3368         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3369                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3370                 dev->data->rx_queues[i] = NULL;
3371                 rte_eth_dma_zone_free(dev, "rx_ring", i);
3372         }
3373         dev->data->nb_rx_queues = 0;
3374
3375         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3376                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3377                 dev->data->tx_queues[i] = NULL;
3378                 rte_eth_dma_zone_free(dev, "tx_ring", i);
3379         }
3380         dev->data->nb_tx_queues = 0;
3381 }
3382
3383 /*********************************************************************
3384  *
3385  *  Device RX/TX init functions
3386  *
3387  **********************************************************************/
3388
3389 /**
3390  * Receive Side Scaling (RSS)
3391  * See section 7.1.2.8 in the following document:
3392  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3393  *
3394  * Principles:
3395  * The source and destination IP addresses of the IP header and the source
3396  * and destination ports of TCP/UDP headers, if any, of received packets are
3397  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3398  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3399  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3400  * RSS output index which is used as the RX queue index where to store the
3401  * received packets.
3402  * The following output is supplied in the RX write-back descriptor:
3403  *     - 32-bit result of the Microsoft RSS hash function,
3404  *     - 4-bit RSS type field.
3405  */
3406
3407 /*
3408  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3409  * Used as the default key.
3410  */
3411 static uint8_t rss_intel_key[40] = {
3412         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3413         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3414         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3415         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3416         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3417 };
3418
3419 static void
3420 ixgbe_rss_disable(struct rte_eth_dev *dev)
3421 {
3422         struct ixgbe_hw *hw;
3423         uint32_t mrqc;
3424         uint32_t mrqc_reg;
3425
3426         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3427         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3428         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3429         mrqc &= ~IXGBE_MRQC_RSSEN;
3430         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3431 }
3432
3433 static void
3434 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3435 {
3436         uint8_t  *hash_key;
3437         uint32_t mrqc;
3438         uint32_t rss_key;
3439         uint64_t rss_hf;
3440         uint16_t i;
3441         uint32_t mrqc_reg;
3442         uint32_t rssrk_reg;
3443
3444         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3445         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3446
3447         hash_key = rss_conf->rss_key;
3448         if (hash_key != NULL) {
3449                 /* Fill in RSS hash key */
3450                 for (i = 0; i < 10; i++) {
3451                         rss_key  = hash_key[(i * 4)];
3452                         rss_key |= hash_key[(i * 4) + 1] << 8;
3453                         rss_key |= hash_key[(i * 4) + 2] << 16;
3454                         rss_key |= hash_key[(i * 4) + 3] << 24;
3455                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3456                 }
3457         }
3458
3459         /* Set configured hashing protocols in MRQC register */
3460         rss_hf = rss_conf->rss_hf;
3461         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3462         if (rss_hf & ETH_RSS_IPV4)
3463                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3464         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3465                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3466         if (rss_hf & ETH_RSS_IPV6)
3467                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3468         if (rss_hf & ETH_RSS_IPV6_EX)
3469                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3470         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3471                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3472         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3473                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3474         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3475                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3476         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3477                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3478         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3479                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3480         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3481 }
3482
3483 int
3484 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3485                           struct rte_eth_rss_conf *rss_conf)
3486 {
3487         struct ixgbe_hw *hw;
3488         uint32_t mrqc;
3489         uint64_t rss_hf;
3490         uint32_t mrqc_reg;
3491
3492         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3493
3494         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3495                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3496                         "NIC.");
3497                 return -ENOTSUP;
3498         }
3499         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3500
3501         /*
3502          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3503          *     "RSS enabling cannot be done dynamically while it must be
3504          *      preceded by a software reset"
3505          * Before changing anything, first check that the update RSS operation
3506          * does not attempt to disable RSS, if RSS was enabled at
3507          * initialization time, or does not attempt to enable RSS, if RSS was
3508          * disabled at initialization time.
3509          */
3510         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3511         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3512         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3513                 if (rss_hf != 0) /* Enable RSS */
3514                         return -(EINVAL);
3515                 return 0; /* Nothing to do */
3516         }
3517         /* RSS enabled */
3518         if (rss_hf == 0) /* Disable RSS */
3519                 return -(EINVAL);
3520         ixgbe_hw_rss_hash_set(hw, rss_conf);
3521         return 0;
3522 }
3523
3524 int
3525 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3526                             struct rte_eth_rss_conf *rss_conf)
3527 {
3528         struct ixgbe_hw *hw;
3529         uint8_t *hash_key;
3530         uint32_t mrqc;
3531         uint32_t rss_key;
3532         uint64_t rss_hf;
3533         uint16_t i;
3534         uint32_t mrqc_reg;
3535         uint32_t rssrk_reg;
3536
3537         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3538         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3539         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3540         hash_key = rss_conf->rss_key;
3541         if (hash_key != NULL) {
3542                 /* Return RSS hash key */
3543                 for (i = 0; i < 10; i++) {
3544                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3545                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3546                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3547                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3548                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3549                 }
3550         }
3551
3552         /* Get RSS functions configured in MRQC register */
3553         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3554         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3555                 rss_conf->rss_hf = 0;
3556                 return 0;
3557         }
3558         rss_hf = 0;
3559         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3560                 rss_hf |= ETH_RSS_IPV4;
3561         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3562                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3563         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3564                 rss_hf |= ETH_RSS_IPV6;
3565         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3566                 rss_hf |= ETH_RSS_IPV6_EX;
3567         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3568                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3569         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3570                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3571         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3572                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3573         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3574                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3575         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3576                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3577         rss_conf->rss_hf = rss_hf;
3578         return 0;
3579 }
3580
3581 static void
3582 ixgbe_rss_configure(struct rte_eth_dev *dev)
3583 {
3584         struct rte_eth_rss_conf rss_conf;
3585         struct ixgbe_adapter *adapter;
3586         struct ixgbe_hw *hw;
3587         uint32_t reta;
3588         uint16_t i;
3589         uint16_t j;
3590         uint16_t sp_reta_size;
3591         uint32_t reta_reg;
3592
3593         PMD_INIT_FUNC_TRACE();
3594         adapter = dev->data->dev_private;
3595         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3596
3597         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3598
3599         /*
3600          * Fill in redirection table
3601          * The byte-swap is needed because NIC registers are in
3602          * little-endian order.
3603          */
3604         if (adapter->rss_reta_updated == 0) {
3605                 reta = 0;
3606                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3607                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3608
3609                         if (j == dev->data->nb_rx_queues)
3610                                 j = 0;
3611                         reta = (reta << 8) | j;
3612                         if ((i & 3) == 3)
3613                                 IXGBE_WRITE_REG(hw, reta_reg,
3614                                                 rte_bswap32(reta));
3615                 }
3616         }
3617
3618         /*
3619          * Configure the RSS key and the RSS protocols used to compute
3620          * the RSS hash of input packets.
3621          */
3622         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3623         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3624                 ixgbe_rss_disable(dev);
3625                 return;
3626         }
3627         if (rss_conf.rss_key == NULL)
3628                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3629         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3630 }
3631
3632 #define NUM_VFTA_REGISTERS 128
3633 #define NIC_RX_BUFFER_SIZE 0x200
3634 #define X550_RX_BUFFER_SIZE 0x180
3635
3636 static void
3637 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3638 {
3639         struct rte_eth_vmdq_dcb_conf *cfg;
3640         struct ixgbe_hw *hw;
3641         enum rte_eth_nb_pools num_pools;
3642         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3643         uint16_t pbsize;
3644         uint8_t nb_tcs; /* number of traffic classes */
3645         int i;
3646
3647         PMD_INIT_FUNC_TRACE();
3648         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3649         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3650         num_pools = cfg->nb_queue_pools;
3651         /* Check we have a valid number of pools */
3652         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3653                 ixgbe_rss_disable(dev);
3654                 return;
3655         }
3656         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3657         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3658
3659         /*
3660          * RXPBSIZE
3661          * split rx buffer up into sections, each for 1 traffic class
3662          */
3663         switch (hw->mac.type) {
3664         case ixgbe_mac_X550:
3665         case ixgbe_mac_X550EM_x:
3666         case ixgbe_mac_X550EM_a:
3667                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3668                 break;
3669         default:
3670                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3671                 break;
3672         }
3673         for (i = 0; i < nb_tcs; i++) {
3674                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3675
3676                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3677                 /* clear 10 bits. */
3678                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3679                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3680         }
3681         /* zero alloc all unused TCs */
3682         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3683                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3684
3685                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3686                 /* clear 10 bits. */
3687                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3688         }
3689
3690         /* MRQC: enable vmdq and dcb */
3691         mrqc = (num_pools == ETH_16_POOLS) ?
3692                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3693         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3694
3695         /* PFVTCTL: turn on virtualisation and set the default pool */
3696         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3697         if (cfg->enable_default_pool) {
3698                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3699         } else {
3700                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3701         }
3702
3703         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3704
3705         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3706         queue_mapping = 0;
3707         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3708                 /*
3709                  * mapping is done with 3 bits per priority,
3710                  * so shift by i*3 each time
3711                  */
3712                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3713
3714         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3715
3716         /* RTRPCS: DCB related */
3717         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3718
3719         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3720         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3721         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3722         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3723
3724         /* VFTA - enable all vlan filters */
3725         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3726                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3727         }
3728
3729         /* VFRE: pool enabling for receive - 16 or 32 */
3730         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3731                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3732
3733         /*
3734          * MPSAR - allow pools to read specific mac addresses
3735          * In this case, all pools should be able to read from mac addr 0
3736          */
3737         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3738         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3739
3740         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3741         for (i = 0; i < cfg->nb_pool_maps; i++) {
3742                 /* set vlan id in VF register and set the valid bit */
3743                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3744                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3745                 /*
3746                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3747                  * pools, we only need to use the first half of the register
3748                  * i.e. bits 0-31
3749                  */
3750                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3751         }
3752 }
3753
3754 /**
3755  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3756  * @dev: pointer to eth_dev structure
3757  * @dcb_config: pointer to ixgbe_dcb_config structure
3758  */
3759 static void
3760 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3761                        struct ixgbe_dcb_config *dcb_config)
3762 {
3763         uint32_t reg;
3764         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3765
3766         PMD_INIT_FUNC_TRACE();
3767         if (hw->mac.type != ixgbe_mac_82598EB) {
3768                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3769                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3770                 reg |= IXGBE_RTTDCS_ARBDIS;
3771                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3772
3773                 /* Enable DCB for Tx with 8 TCs */
3774                 if (dcb_config->num_tcs.pg_tcs == 8) {
3775                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3776                 } else {
3777                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3778                 }
3779                 if (dcb_config->vt_mode)
3780                         reg |= IXGBE_MTQC_VT_ENA;
3781                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3782
3783                 /* Enable the Tx desc arbiter */
3784                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3785                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3786                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3787
3788                 /* Enable Security TX Buffer IFG for DCB */
3789                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3790                 reg |= IXGBE_SECTX_DCB;
3791                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3792         }
3793 }
3794
3795 /**
3796  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3797  * @dev: pointer to rte_eth_dev structure
3798  * @dcb_config: pointer to ixgbe_dcb_config structure
3799  */
3800 static void
3801 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3802                         struct ixgbe_dcb_config *dcb_config)
3803 {
3804         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3805                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3806         struct ixgbe_hw *hw =
3807                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3808
3809         PMD_INIT_FUNC_TRACE();
3810         if (hw->mac.type != ixgbe_mac_82598EB)
3811                 /*PF VF Transmit Enable*/
3812                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3813                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3814
3815         /*Configure general DCB TX parameters*/
3816         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3817 }
3818
3819 static void
3820 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3821                         struct ixgbe_dcb_config *dcb_config)
3822 {
3823         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3824                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3825         struct ixgbe_dcb_tc_config *tc;
3826         uint8_t i, j;
3827
3828         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3829         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3830                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3831                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3832         } else {
3833                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3834                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3835         }
3836
3837         /* Initialize User Priority to Traffic Class mapping */
3838         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3839                 tc = &dcb_config->tc_config[j];
3840                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3841         }
3842
3843         /* User Priority to Traffic Class mapping */
3844         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3845                 j = vmdq_rx_conf->dcb_tc[i];
3846                 tc = &dcb_config->tc_config[j];
3847                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3848                                                 (uint8_t)(1 << i);
3849         }
3850 }
3851
3852 static void
3853 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3854                         struct ixgbe_dcb_config *dcb_config)
3855 {
3856         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3857                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3858         struct ixgbe_dcb_tc_config *tc;
3859         uint8_t i, j;
3860
3861         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3862         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3863                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3864                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3865         } else {
3866                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3867                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3868         }
3869
3870         /* Initialize User Priority to Traffic Class mapping */
3871         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3872                 tc = &dcb_config->tc_config[j];
3873                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3874         }
3875
3876         /* User Priority to Traffic Class mapping */
3877         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3878                 j = vmdq_tx_conf->dcb_tc[i];
3879                 tc = &dcb_config->tc_config[j];
3880                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3881                                                 (uint8_t)(1 << i);
3882         }
3883 }
3884
3885 static void
3886 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3887                 struct ixgbe_dcb_config *dcb_config)
3888 {
3889         struct rte_eth_dcb_rx_conf *rx_conf =
3890                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3891         struct ixgbe_dcb_tc_config *tc;
3892         uint8_t i, j;
3893
3894         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3895         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3896
3897         /* Initialize User Priority to Traffic Class mapping */
3898         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3899                 tc = &dcb_config->tc_config[j];
3900                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3901         }
3902
3903         /* User Priority to Traffic Class mapping */
3904         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3905                 j = rx_conf->dcb_tc[i];
3906                 tc = &dcb_config->tc_config[j];
3907                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3908                                                 (uint8_t)(1 << i);
3909         }
3910 }
3911
3912 static void
3913 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3914                 struct ixgbe_dcb_config *dcb_config)
3915 {
3916         struct rte_eth_dcb_tx_conf *tx_conf =
3917                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3918         struct ixgbe_dcb_tc_config *tc;
3919         uint8_t i, j;
3920
3921         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3922         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3923
3924         /* Initialize User Priority to Traffic Class mapping */
3925         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3926                 tc = &dcb_config->tc_config[j];
3927                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3928         }
3929
3930         /* User Priority to Traffic Class mapping */
3931         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3932                 j = tx_conf->dcb_tc[i];
3933                 tc = &dcb_config->tc_config[j];
3934                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3935                                                 (uint8_t)(1 << i);
3936         }
3937 }
3938
3939 /**
3940  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3941  * @dev: pointer to eth_dev structure
3942  * @dcb_config: pointer to ixgbe_dcb_config structure
3943  */
3944 static void
3945 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3946                        struct ixgbe_dcb_config *dcb_config)
3947 {
3948         uint32_t reg;
3949         uint32_t vlanctrl;
3950         uint8_t i;
3951         uint32_t q;
3952         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3953
3954         PMD_INIT_FUNC_TRACE();
3955         /*
3956          * Disable the arbiter before changing parameters
3957          * (always enable recycle mode; WSP)
3958          */
3959         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3960         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3961
3962         if (hw->mac.type != ixgbe_mac_82598EB) {
3963                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3964                 if (dcb_config->num_tcs.pg_tcs == 4) {
3965                         if (dcb_config->vt_mode)
3966                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3967                                         IXGBE_MRQC_VMDQRT4TCEN;
3968                         else {
3969                                 /* no matter the mode is DCB or DCB_RSS, just
3970                                  * set the MRQE to RSSXTCEN. RSS is controlled
3971                                  * by RSS_FIELD
3972                                  */
3973                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3974                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3975                                         IXGBE_MRQC_RTRSS4TCEN;
3976                         }
3977                 }
3978                 if (dcb_config->num_tcs.pg_tcs == 8) {
3979                         if (dcb_config->vt_mode)
3980                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3981                                         IXGBE_MRQC_VMDQRT8TCEN;
3982                         else {
3983                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3984                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3985                                         IXGBE_MRQC_RTRSS8TCEN;
3986                         }
3987                 }
3988
3989                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3990
3991                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3992                         /* Disable drop for all queues in VMDQ mode*/
3993                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3994                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3995                                                 (IXGBE_QDE_WRITE |
3996                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3997                 } else {
3998                         /* Enable drop for all queues in SRIOV mode */
3999                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4000                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4001                                                 (IXGBE_QDE_WRITE |
4002                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4003                                                  IXGBE_QDE_ENABLE));
4004                 }
4005         }
4006
4007         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4008         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4009         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4010         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4011
4012         /* VFTA - enable all vlan filters */
4013         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4014                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4015         }
4016
4017         /*
4018          * Configure Rx packet plane (recycle mode; WSP) and
4019          * enable arbiter
4020          */
4021         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4022         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4023 }
4024
4025 static void
4026 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4027                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4028 {
4029         switch (hw->mac.type) {
4030         case ixgbe_mac_82598EB:
4031                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4032                 break;
4033         case ixgbe_mac_82599EB:
4034         case ixgbe_mac_X540:
4035         case ixgbe_mac_X550:
4036         case ixgbe_mac_X550EM_x:
4037         case ixgbe_mac_X550EM_a:
4038                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4039                                                   tsa, map);
4040                 break;
4041         default:
4042                 break;
4043         }
4044 }
4045
4046 static void
4047 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4048                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4049 {
4050         switch (hw->mac.type) {
4051         case ixgbe_mac_82598EB:
4052                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4053                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4054                 break;
4055         case ixgbe_mac_82599EB:
4056         case ixgbe_mac_X540:
4057         case ixgbe_mac_X550:
4058         case ixgbe_mac_X550EM_x:
4059         case ixgbe_mac_X550EM_a:
4060                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4061                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4062                 break;
4063         default:
4064                 break;
4065         }
4066 }
4067
4068 #define DCB_RX_CONFIG  1
4069 #define DCB_TX_CONFIG  1
4070 #define DCB_TX_PB      1024
4071 /**
4072  * ixgbe_dcb_hw_configure - Enable DCB and configure
4073  * general DCB in VT mode and non-VT mode parameters
4074  * @dev: pointer to rte_eth_dev structure
4075  * @dcb_config: pointer to ixgbe_dcb_config structure
4076  */
4077 static int
4078 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4079                         struct ixgbe_dcb_config *dcb_config)
4080 {
4081         int     ret = 0;
4082         uint8_t i, pfc_en, nb_tcs;
4083         uint16_t pbsize, rx_buffer_size;
4084         uint8_t config_dcb_rx = 0;
4085         uint8_t config_dcb_tx = 0;
4086         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4087         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4088         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4089         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4090         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4091         struct ixgbe_dcb_tc_config *tc;
4092         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4093                 RTE_ETHER_CRC_LEN;
4094         struct ixgbe_hw *hw =
4095                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4096         struct ixgbe_bw_conf *bw_conf =
4097                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4098
4099         switch (dev->data->dev_conf.rxmode.mq_mode) {
4100         case ETH_MQ_RX_VMDQ_DCB:
4101                 dcb_config->vt_mode = true;
4102                 if (hw->mac.type != ixgbe_mac_82598EB) {
4103                         config_dcb_rx = DCB_RX_CONFIG;
4104                         /*
4105                          *get dcb and VT rx configuration parameters
4106                          *from rte_eth_conf
4107                          */
4108                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4109                         /*Configure general VMDQ and DCB RX parameters*/
4110                         ixgbe_vmdq_dcb_configure(dev);
4111                 }
4112                 break;
4113         case ETH_MQ_RX_DCB:
4114         case ETH_MQ_RX_DCB_RSS:
4115                 dcb_config->vt_mode = false;
4116                 config_dcb_rx = DCB_RX_CONFIG;
4117                 /* Get dcb TX configuration parameters from rte_eth_conf */
4118                 ixgbe_dcb_rx_config(dev, dcb_config);
4119                 /*Configure general DCB RX parameters*/
4120                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4121                 break;
4122         default:
4123                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4124                 break;
4125         }
4126         switch (dev->data->dev_conf.txmode.mq_mode) {
4127         case ETH_MQ_TX_VMDQ_DCB:
4128                 dcb_config->vt_mode = true;
4129                 config_dcb_tx = DCB_TX_CONFIG;
4130                 /* get DCB and VT TX configuration parameters
4131                  * from rte_eth_conf
4132                  */
4133                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4134                 /*Configure general VMDQ and DCB TX parameters*/
4135                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4136                 break;
4137
4138         case ETH_MQ_TX_DCB:
4139                 dcb_config->vt_mode = false;
4140                 config_dcb_tx = DCB_TX_CONFIG;
4141                 /*get DCB TX configuration parameters from rte_eth_conf*/
4142                 ixgbe_dcb_tx_config(dev, dcb_config);
4143                 /*Configure general DCB TX parameters*/
4144                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4145                 break;
4146         default:
4147                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4148                 break;
4149         }
4150
4151         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4152         /* Unpack map */
4153         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4154         if (nb_tcs == ETH_4_TCS) {
4155                 /* Avoid un-configured priority mapping to TC0 */
4156                 uint8_t j = 4;
4157                 uint8_t mask = 0xFF;
4158
4159                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4160                         mask = (uint8_t)(mask & (~(1 << map[i])));
4161                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4162                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4163                                 map[j++] = i;
4164                         mask >>= 1;
4165                 }
4166                 /* Re-configure 4 TCs BW */
4167                 for (i = 0; i < nb_tcs; i++) {
4168                         tc = &dcb_config->tc_config[i];
4169                         if (bw_conf->tc_num != nb_tcs)
4170                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4171                                         (uint8_t)(100 / nb_tcs);
4172                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4173                                                 (uint8_t)(100 / nb_tcs);
4174                 }
4175                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4176                         tc = &dcb_config->tc_config[i];
4177                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4178                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4179                 }
4180         } else {
4181                 /* Re-configure 8 TCs BW */
4182                 for (i = 0; i < nb_tcs; i++) {
4183                         tc = &dcb_config->tc_config[i];
4184                         if (bw_conf->tc_num != nb_tcs)
4185                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4186                                         (uint8_t)(100 / nb_tcs + (i & 1));
4187                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4188                                 (uint8_t)(100 / nb_tcs + (i & 1));
4189                 }
4190         }
4191
4192         switch (hw->mac.type) {
4193         case ixgbe_mac_X550:
4194         case ixgbe_mac_X550EM_x:
4195         case ixgbe_mac_X550EM_a:
4196                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4197                 break;
4198         default:
4199                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4200                 break;
4201         }
4202
4203         if (config_dcb_rx) {
4204                 /* Set RX buffer size */
4205                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4206                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4207
4208                 for (i = 0; i < nb_tcs; i++) {
4209                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4210                 }
4211                 /* zero alloc all unused TCs */
4212                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4213                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4214                 }
4215         }
4216         if (config_dcb_tx) {
4217                 /* Only support an equally distributed
4218                  *  Tx packet buffer strategy.
4219                  */
4220                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4221                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4222
4223                 for (i = 0; i < nb_tcs; i++) {
4224                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4225                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4226                 }
4227                 /* Clear unused TCs, if any, to zero buffer size*/
4228                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4229                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4230                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4231                 }
4232         }
4233
4234         /*Calculates traffic class credits*/
4235         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4236                                 IXGBE_DCB_TX_CONFIG);
4237         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4238                                 IXGBE_DCB_RX_CONFIG);
4239
4240         if (config_dcb_rx) {
4241                 /* Unpack CEE standard containers */
4242                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4243                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4244                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4245                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4246                 /* Configure PG(ETS) RX */
4247                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4248         }
4249
4250         if (config_dcb_tx) {
4251                 /* Unpack CEE standard containers */
4252                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4253                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4254                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4255                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4256                 /* Configure PG(ETS) TX */
4257                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4258         }
4259
4260         /*Configure queue statistics registers*/
4261         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4262
4263         /* Check if the PFC is supported */
4264         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4265                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4266                 for (i = 0; i < nb_tcs; i++) {
4267                         /*
4268                         * If the TC count is 8,and the default high_water is 48,
4269                         * the low_water is 16 as default.
4270                         */
4271                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4272                         hw->fc.low_water[i] = pbsize / 4;
4273                         /* Enable pfc for this TC */
4274                         tc = &dcb_config->tc_config[i];
4275                         tc->pfc = ixgbe_dcb_pfc_enabled;
4276                 }
4277                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4278                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4279                         pfc_en &= 0x0F;
4280                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4281         }
4282
4283         return ret;
4284 }
4285
4286 /**
4287  * ixgbe_configure_dcb - Configure DCB  Hardware
4288  * @dev: pointer to rte_eth_dev
4289  */
4290 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4291 {
4292         struct ixgbe_dcb_config *dcb_cfg =
4293                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4294         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4295
4296         PMD_INIT_FUNC_TRACE();
4297
4298         /* check support mq_mode for DCB */
4299         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4300             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4301             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4302                 return;
4303
4304         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4305                 return;
4306
4307         /** Configure DCB hardware **/
4308         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4309 }
4310
4311 /*
4312  * VMDq only support for 10 GbE NIC.
4313  */
4314 static void
4315 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4316 {
4317         struct rte_eth_vmdq_rx_conf *cfg;
4318         struct ixgbe_hw *hw;
4319         enum rte_eth_nb_pools num_pools;
4320         uint32_t mrqc, vt_ctl, vlanctrl;
4321         uint32_t vmolr = 0;
4322         int i;
4323
4324         PMD_INIT_FUNC_TRACE();
4325         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4326         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4327         num_pools = cfg->nb_queue_pools;
4328
4329         ixgbe_rss_disable(dev);
4330
4331         /* MRQC: enable vmdq */
4332         mrqc = IXGBE_MRQC_VMDQEN;
4333         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4334
4335         /* PFVTCTL: turn on virtualisation and set the default pool */
4336         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4337         if (cfg->enable_default_pool)
4338                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4339         else
4340                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4341
4342         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4343
4344         for (i = 0; i < (int)num_pools; i++) {
4345                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4346                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4347         }
4348
4349         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4350         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4351         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4352         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4353
4354         /* VFTA - enable all vlan filters */
4355         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4356                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4357
4358         /* VFRE: pool enabling for receive - 64 */
4359         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4360         if (num_pools == ETH_64_POOLS)
4361                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4362
4363         /*
4364          * MPSAR - allow pools to read specific mac addresses
4365          * In this case, all pools should be able to read from mac addr 0
4366          */
4367         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4368         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4369
4370         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4371         for (i = 0; i < cfg->nb_pool_maps; i++) {
4372                 /* set vlan id in VF register and set the valid bit */
4373                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4374                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4375                 /*
4376                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4377                  * pools, we only need to use the first half of the register
4378                  * i.e. bits 0-31
4379                  */
4380                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4381                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4382                                         (cfg->pool_map[i].pools & UINT32_MAX));
4383                 else
4384                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4385                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4386
4387         }
4388
4389         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4390         if (cfg->enable_loop_back) {
4391                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4392                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4393                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4394         }
4395
4396         IXGBE_WRITE_FLUSH(hw);
4397 }
4398
4399 /*
4400  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4401  * @hw: pointer to hardware structure
4402  */
4403 static void
4404 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4405 {
4406         uint32_t reg;
4407         uint32_t q;
4408
4409         PMD_INIT_FUNC_TRACE();
4410         /*PF VF Transmit Enable*/
4411         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4412         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4413
4414         /* Disable the Tx desc arbiter so that MTQC can be changed */
4415         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4416         reg |= IXGBE_RTTDCS_ARBDIS;
4417         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4418
4419         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4420         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4421
4422         /* Disable drop for all queues */
4423         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4424                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4425                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4426
4427         /* Enable the Tx desc arbiter */
4428         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4429         reg &= ~IXGBE_RTTDCS_ARBDIS;
4430         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4431
4432         IXGBE_WRITE_FLUSH(hw);
4433 }
4434
4435 static int __rte_cold
4436 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4437 {
4438         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4439         uint64_t dma_addr;
4440         unsigned int i;
4441
4442         /* Initialize software ring entries */
4443         for (i = 0; i < rxq->nb_rx_desc; i++) {
4444                 volatile union ixgbe_adv_rx_desc *rxd;
4445                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4446
4447                 if (mbuf == NULL) {
4448                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4449                                      (unsigned) rxq->queue_id);
4450                         return -ENOMEM;
4451                 }
4452
4453                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4454                 mbuf->port = rxq->port_id;
4455
4456                 dma_addr =
4457                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4458                 rxd = &rxq->rx_ring[i];
4459                 rxd->read.hdr_addr = 0;
4460                 rxd->read.pkt_addr = dma_addr;
4461                 rxe[i].mbuf = mbuf;
4462         }
4463
4464         return 0;
4465 }
4466
4467 static int
4468 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4469 {
4470         struct ixgbe_hw *hw;
4471         uint32_t mrqc;
4472
4473         ixgbe_rss_configure(dev);
4474
4475         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4476
4477         /* MRQC: enable VF RSS */
4478         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4479         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4480         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4481         case ETH_64_POOLS:
4482                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4483                 break;
4484
4485         case ETH_32_POOLS:
4486                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4487                 break;
4488
4489         default:
4490                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4491                 return -EINVAL;
4492         }
4493
4494         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4495
4496         return 0;
4497 }
4498
4499 static int
4500 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4501 {
4502         struct ixgbe_hw *hw =
4503                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4504
4505         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4506         case ETH_64_POOLS:
4507                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4508                         IXGBE_MRQC_VMDQEN);
4509                 break;
4510
4511         case ETH_32_POOLS:
4512                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4513                         IXGBE_MRQC_VMDQRT4TCEN);
4514                 break;
4515
4516         case ETH_16_POOLS:
4517                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4518                         IXGBE_MRQC_VMDQRT8TCEN);
4519                 break;
4520         default:
4521                 PMD_INIT_LOG(ERR,
4522                         "invalid pool number in IOV mode");
4523                 break;
4524         }
4525         return 0;
4526 }
4527
4528 static int
4529 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4530 {
4531         struct ixgbe_hw *hw =
4532                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4533
4534         if (hw->mac.type == ixgbe_mac_82598EB)
4535                 return 0;
4536
4537         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4538                 /*
4539                  * SRIOV inactive scheme
4540                  * any DCB/RSS w/o VMDq multi-queue setting
4541                  */
4542                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4543                 case ETH_MQ_RX_RSS:
4544                 case ETH_MQ_RX_DCB_RSS:
4545                 case ETH_MQ_RX_VMDQ_RSS:
4546                         ixgbe_rss_configure(dev);
4547                         break;
4548
4549                 case ETH_MQ_RX_VMDQ_DCB:
4550                         ixgbe_vmdq_dcb_configure(dev);
4551                         break;
4552
4553                 case ETH_MQ_RX_VMDQ_ONLY:
4554                         ixgbe_vmdq_rx_hw_configure(dev);
4555                         break;
4556
4557                 case ETH_MQ_RX_NONE:
4558                 default:
4559                         /* if mq_mode is none, disable rss mode.*/
4560                         ixgbe_rss_disable(dev);
4561                         break;
4562                 }
4563         } else {
4564                 /* SRIOV active scheme
4565                  * Support RSS together with SRIOV.
4566                  */
4567                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4568                 case ETH_MQ_RX_RSS:
4569                 case ETH_MQ_RX_VMDQ_RSS:
4570                         ixgbe_config_vf_rss(dev);
4571                         break;
4572                 case ETH_MQ_RX_VMDQ_DCB:
4573                 case ETH_MQ_RX_DCB:
4574                 /* In SRIOV, the configuration is the same as VMDq case */
4575                         ixgbe_vmdq_dcb_configure(dev);
4576                         break;
4577                 /* DCB/RSS together with SRIOV is not supported */
4578                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4579                 case ETH_MQ_RX_DCB_RSS:
4580                         PMD_INIT_LOG(ERR,
4581                                 "Could not support DCB/RSS with VMDq & SRIOV");
4582                         return -1;
4583                 default:
4584                         ixgbe_config_vf_default(dev);
4585                         break;
4586                 }
4587         }
4588
4589         return 0;
4590 }
4591
4592 static int
4593 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4594 {
4595         struct ixgbe_hw *hw =
4596                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4597         uint32_t mtqc;
4598         uint32_t rttdcs;
4599
4600         if (hw->mac.type == ixgbe_mac_82598EB)
4601                 return 0;
4602
4603         /* disable arbiter before setting MTQC */
4604         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4605         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4606         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4607
4608         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4609                 /*
4610                  * SRIOV inactive scheme
4611                  * any DCB w/o VMDq multi-queue setting
4612                  */
4613                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4614                         ixgbe_vmdq_tx_hw_configure(hw);
4615                 else {
4616                         mtqc = IXGBE_MTQC_64Q_1PB;
4617                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4618                 }
4619         } else {
4620                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4621
4622                 /*
4623                  * SRIOV active scheme
4624                  * FIXME if support DCB together with VMDq & SRIOV
4625                  */
4626                 case ETH_64_POOLS:
4627                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4628                         break;
4629                 case ETH_32_POOLS:
4630                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4631                         break;
4632                 case ETH_16_POOLS:
4633                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4634                                 IXGBE_MTQC_8TC_8TQ;
4635                         break;
4636                 default:
4637                         mtqc = IXGBE_MTQC_64Q_1PB;
4638                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4639                 }
4640                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4641         }
4642
4643         /* re-enable arbiter */
4644         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4645         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4646
4647         return 0;
4648 }
4649
4650 /**
4651  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4652  *
4653  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4654  * spec rev. 3.0 chapter 8.2.3.8.13.
4655  *
4656  * @pool Memory pool of the Rx queue
4657  */
4658 static inline uint32_t
4659 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4660 {
4661         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4662
4663         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4664         uint16_t maxdesc =
4665                 RTE_IPV4_MAX_PKT_LEN /
4666                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4667
4668         if (maxdesc >= 16)
4669                 return IXGBE_RSCCTL_MAXDESC_16;
4670         else if (maxdesc >= 8)
4671                 return IXGBE_RSCCTL_MAXDESC_8;
4672         else if (maxdesc >= 4)
4673                 return IXGBE_RSCCTL_MAXDESC_4;
4674         else
4675                 return IXGBE_RSCCTL_MAXDESC_1;
4676 }
4677
4678 /**
4679  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4680  * interrupt
4681  *
4682  * (Taken from FreeBSD tree)
4683  * (yes this is all very magic and confusing :)
4684  *
4685  * @dev port handle
4686  * @entry the register array entry
4687  * @vector the MSIX vector for this queue
4688  * @type RX/TX/MISC
4689  */
4690 static void
4691 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4692 {
4693         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4694         u32 ivar, index;
4695
4696         vector |= IXGBE_IVAR_ALLOC_VAL;
4697
4698         switch (hw->mac.type) {
4699
4700         case ixgbe_mac_82598EB:
4701                 if (type == -1)
4702                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4703                 else
4704                         entry += (type * 64);
4705                 index = (entry >> 2) & 0x1F;
4706                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4707                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4708                 ivar |= (vector << (8 * (entry & 0x3)));
4709                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4710                 break;
4711
4712         case ixgbe_mac_82599EB:
4713         case ixgbe_mac_X540:
4714                 if (type == -1) { /* MISC IVAR */
4715                         index = (entry & 1) * 8;
4716                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4717                         ivar &= ~(0xFF << index);
4718                         ivar |= (vector << index);
4719                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4720                 } else {        /* RX/TX IVARS */
4721                         index = (16 * (entry & 1)) + (8 * type);
4722                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4723                         ivar &= ~(0xFF << index);
4724                         ivar |= (vector << index);
4725                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4726                 }
4727
4728                 break;
4729
4730         default:
4731                 break;
4732         }
4733 }
4734
4735 void __rte_cold
4736 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4737 {
4738         uint16_t i, rx_using_sse;
4739         struct ixgbe_adapter *adapter = dev->data->dev_private;
4740
4741         /*
4742          * In order to allow Vector Rx there are a few configuration
4743          * conditions to be met and Rx Bulk Allocation should be allowed.
4744          */
4745         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4746             !adapter->rx_bulk_alloc_allowed) {
4747                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4748                                     "preconditions",
4749                              dev->data->port_id);
4750
4751                 adapter->rx_vec_allowed = false;
4752         }
4753
4754         /*
4755          * Initialize the appropriate LRO callback.
4756          *
4757          * If all queues satisfy the bulk allocation preconditions
4758          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4759          * Otherwise use a single allocation version.
4760          */
4761         if (dev->data->lro) {
4762                 if (adapter->rx_bulk_alloc_allowed) {
4763                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4764                                            "allocation version");
4765                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4766                 } else {
4767                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4768                                            "allocation version");
4769                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4770                 }
4771         } else if (dev->data->scattered_rx) {
4772                 /*
4773                  * Set the non-LRO scattered callback: there are Vector and
4774                  * single allocation versions.
4775                  */
4776                 if (adapter->rx_vec_allowed) {
4777                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4778                                             "callback (port=%d).",
4779                                      dev->data->port_id);
4780
4781                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4782                 } else if (adapter->rx_bulk_alloc_allowed) {
4783                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4784                                            "allocation callback (port=%d).",
4785                                      dev->data->port_id);
4786                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4787                 } else {
4788                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4789                                             "single allocation) "
4790                                             "Scattered Rx callback "
4791                                             "(port=%d).",
4792                                      dev->data->port_id);
4793
4794                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4795                 }
4796         /*
4797          * Below we set "simple" callbacks according to port/queues parameters.
4798          * If parameters allow we are going to choose between the following
4799          * callbacks:
4800          *    - Vector
4801          *    - Bulk Allocation
4802          *    - Single buffer allocation (the simplest one)
4803          */
4804         } else if (adapter->rx_vec_allowed) {
4805                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4806                                     "burst size no less than %d (port=%d).",
4807                              RTE_IXGBE_DESCS_PER_LOOP,
4808                              dev->data->port_id);
4809
4810                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4811         } else if (adapter->rx_bulk_alloc_allowed) {
4812                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4813                                     "satisfied. Rx Burst Bulk Alloc function "
4814                                     "will be used on port=%d.",
4815                              dev->data->port_id);
4816
4817                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4818         } else {
4819                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4820                                     "satisfied, or Scattered Rx is requested "
4821                                     "(port=%d).",
4822                              dev->data->port_id);
4823
4824                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4825         }
4826
4827         /* Propagate information about RX function choice through all queues. */
4828
4829         rx_using_sse =
4830                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4831                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4832
4833         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4834                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4835
4836                 rxq->rx_using_sse = rx_using_sse;
4837 #ifdef RTE_LIBRTE_SECURITY
4838                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4839                                 DEV_RX_OFFLOAD_SECURITY);
4840 #endif
4841         }
4842 }
4843
4844 /**
4845  * ixgbe_set_rsc - configure RSC related port HW registers
4846  *
4847  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4848  * of 82599 Spec (x540 configuration is virtually the same).
4849  *
4850  * @dev port handle
4851  *
4852  * Returns 0 in case of success or a non-zero error code
4853  */
4854 static int
4855 ixgbe_set_rsc(struct rte_eth_dev *dev)
4856 {
4857         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4858         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4859         struct rte_eth_dev_info dev_info = { 0 };
4860         bool rsc_capable = false;
4861         uint16_t i;
4862         uint32_t rdrxctl;
4863         uint32_t rfctl;
4864
4865         /* Sanity check */
4866         dev->dev_ops->dev_infos_get(dev, &dev_info);
4867         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4868                 rsc_capable = true;
4869
4870         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4871                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4872                                    "support it");
4873                 return -EINVAL;
4874         }
4875
4876         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4877
4878         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4879              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4880                 /*
4881                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4882                  * 3.0 RSC configuration requires HW CRC stripping being
4883                  * enabled. If user requested both HW CRC stripping off
4884                  * and RSC on - return an error.
4885                  */
4886                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4887                                     "is disabled");
4888                 return -EINVAL;
4889         }
4890
4891         /* RFCTL configuration  */
4892         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4893         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4894                 /*
4895                  * Since NFS packets coalescing is not supported - clear
4896                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4897                  * enabled.
4898                  */
4899                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4900                            IXGBE_RFCTL_NFSR_DIS);
4901         else
4902                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4903         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4904
4905         /* If LRO hasn't been requested - we are done here. */
4906         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4907                 return 0;
4908
4909         /* Set RDRXCTL.RSCACKC bit */
4910         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4911         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4912         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4913
4914         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4915         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4916                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4917                 uint32_t srrctl =
4918                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4919                 uint32_t rscctl =
4920                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4921                 uint32_t psrtype =
4922                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4923                 uint32_t eitr =
4924                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4925
4926                 /*
4927                  * ixgbe PMD doesn't support header-split at the moment.
4928                  *
4929                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4930                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4931                  * should be configured even if header split is not
4932                  * enabled. We will configure it 128 bytes following the
4933                  * recommendation in the spec.
4934                  */
4935                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4936                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4937                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4938
4939                 /*
4940                  * TODO: Consider setting the Receive Descriptor Minimum
4941                  * Threshold Size for an RSC case. This is not an obviously
4942                  * beneficiary option but the one worth considering...
4943                  */
4944
4945                 rscctl |= IXGBE_RSCCTL_RSCEN;
4946                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4947                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4948
4949                 /*
4950                  * RSC: Set ITR interval corresponding to 2K ints/s.
4951                  *
4952                  * Full-sized RSC aggregations for a 10Gb/s link will
4953                  * arrive at about 20K aggregation/s rate.
4954                  *
4955                  * 2K inst/s rate will make only 10% of the
4956                  * aggregations to be closed due to the interrupt timer
4957                  * expiration for a streaming at wire-speed case.
4958                  *
4959                  * For a sparse streaming case this setting will yield
4960                  * at most 500us latency for a single RSC aggregation.
4961                  */
4962                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4963                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4964                 eitr |= IXGBE_EITR_CNT_WDIS;
4965
4966                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4967                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4968                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4969                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4970
4971                 /*
4972                  * RSC requires the mapping of the queue to the
4973                  * interrupt vector.
4974                  */
4975                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4976         }
4977
4978         dev->data->lro = 1;
4979
4980         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4981
4982         return 0;
4983 }
4984
4985 /*
4986  * Initializes Receive Unit.
4987  */
4988 int __rte_cold
4989 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4990 {
4991         struct ixgbe_hw     *hw;
4992         struct ixgbe_rx_queue *rxq;
4993         uint64_t bus_addr;
4994         uint32_t rxctrl;
4995         uint32_t fctrl;
4996         uint32_t hlreg0;
4997         uint32_t maxfrs;
4998         uint32_t srrctl;
4999         uint32_t rdrxctl;
5000         uint32_t rxcsum;
5001         uint16_t buf_size;
5002         uint16_t i;
5003         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5004         int rc;
5005
5006         PMD_INIT_FUNC_TRACE();
5007         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5008
5009         /*
5010          * Make sure receives are disabled while setting
5011          * up the RX context (registers, descriptor rings, etc.).
5012          */
5013         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5014         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5015
5016         /* Enable receipt of broadcasted frames */
5017         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5018         fctrl |= IXGBE_FCTRL_BAM;
5019         fctrl |= IXGBE_FCTRL_DPF;
5020         fctrl |= IXGBE_FCTRL_PMCF;
5021         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5022
5023         /*
5024          * Configure CRC stripping, if any.
5025          */
5026         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5027         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5028                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5029         else
5030                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5031
5032         /*
5033          * Configure jumbo frame support, if any.
5034          */
5035         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5036                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5037                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5038                 maxfrs &= 0x0000FFFF;
5039                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5040                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5041         } else
5042                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5043
5044         /*
5045          * If loopback mode is configured, set LPBK bit.
5046          */
5047         if (dev->data->dev_conf.lpbk_mode != 0) {
5048                 rc = ixgbe_check_supported_loopback_mode(dev);
5049                 if (rc < 0) {
5050                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5051                         return rc;
5052                 }
5053                 hlreg0 |= IXGBE_HLREG0_LPBK;
5054         } else {
5055                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5056         }
5057
5058         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5059
5060         /*
5061          * Assume no header split and no VLAN strip support
5062          * on any Rx queue first .
5063          */
5064         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5065         /* Setup RX queues */
5066         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5067                 rxq = dev->data->rx_queues[i];
5068
5069                 /*
5070                  * Reset crc_len in case it was changed after queue setup by a
5071                  * call to configure.
5072                  */
5073                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5074                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5075                 else
5076                         rxq->crc_len = 0;
5077
5078                 /* Setup the Base and Length of the Rx Descriptor Rings */
5079                 bus_addr = rxq->rx_ring_phys_addr;
5080                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5081                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5082                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5083                                 (uint32_t)(bus_addr >> 32));
5084                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5085                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5086                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5087                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5088
5089                 /* Configure the SRRCTL register */
5090                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5091
5092                 /* Set if packets are dropped when no descriptors available */
5093                 if (rxq->drop_en)
5094                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5095
5096                 /*
5097                  * Configure the RX buffer size in the BSIZEPACKET field of
5098                  * the SRRCTL register of the queue.
5099                  * The value is in 1 KB resolution. Valid values can be from
5100                  * 1 KB to 16 KB.
5101                  */
5102                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5103                         RTE_PKTMBUF_HEADROOM);
5104                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5105                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5106
5107                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5108
5109                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5110                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5111
5112                 /* It adds dual VLAN length for supporting dual VLAN */
5113                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5114                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5115                         dev->data->scattered_rx = 1;
5116                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5117                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5118         }
5119
5120         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5121                 dev->data->scattered_rx = 1;
5122
5123         /*
5124          * Device configured with multiple RX queues.
5125          */
5126         ixgbe_dev_mq_rx_configure(dev);
5127
5128         /*
5129          * Setup the Checksum Register.
5130          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5131          * Enable IP/L4 checkum computation by hardware if requested to do so.
5132          */
5133         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5134         rxcsum |= IXGBE_RXCSUM_PCSD;
5135         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5136                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5137         else
5138                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5139
5140         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5141
5142         if (hw->mac.type == ixgbe_mac_82599EB ||
5143             hw->mac.type == ixgbe_mac_X540) {
5144                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5145                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5146                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5147                 else
5148                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5149                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5150                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5151         }
5152
5153         rc = ixgbe_set_rsc(dev);
5154         if (rc)
5155                 return rc;
5156
5157         ixgbe_set_rx_function(dev);
5158
5159         return 0;
5160 }
5161
5162 /*
5163  * Initializes Transmit Unit.
5164  */
5165 void __rte_cold
5166 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5167 {
5168         struct ixgbe_hw     *hw;
5169         struct ixgbe_tx_queue *txq;
5170         uint64_t bus_addr;
5171         uint32_t hlreg0;
5172         uint32_t txctrl;
5173         uint16_t i;
5174
5175         PMD_INIT_FUNC_TRACE();
5176         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5177
5178         /* Enable TX CRC (checksum offload requirement) and hw padding
5179          * (TSO requirement)
5180          */
5181         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5182         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5183         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5184
5185         /* Setup the Base and Length of the Tx Descriptor Rings */
5186         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5187                 txq = dev->data->tx_queues[i];
5188
5189                 bus_addr = txq->tx_ring_phys_addr;
5190                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5191                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5192                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5193                                 (uint32_t)(bus_addr >> 32));
5194                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5195                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5196                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5197                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5198                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5199
5200                 /*
5201                  * Disable Tx Head Writeback RO bit, since this hoses
5202                  * bookkeeping if things aren't delivered in order.
5203                  */
5204                 switch (hw->mac.type) {
5205                 case ixgbe_mac_82598EB:
5206                         txctrl = IXGBE_READ_REG(hw,
5207                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5208                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5209                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5210                                         txctrl);
5211                         break;
5212
5213                 case ixgbe_mac_82599EB:
5214                 case ixgbe_mac_X540:
5215                 case ixgbe_mac_X550:
5216                 case ixgbe_mac_X550EM_x:
5217                 case ixgbe_mac_X550EM_a:
5218                 default:
5219                         txctrl = IXGBE_READ_REG(hw,
5220                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5221                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5222                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5223                                         txctrl);
5224                         break;
5225                 }
5226         }
5227
5228         /* Device configured with multiple TX queues. */
5229         ixgbe_dev_mq_tx_configure(dev);
5230 }
5231
5232 /*
5233  * Check if requested loopback mode is supported
5234  */
5235 int
5236 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5237 {
5238         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5239
5240         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5241                 if (hw->mac.type == ixgbe_mac_82599EB ||
5242                      hw->mac.type == ixgbe_mac_X540 ||
5243                      hw->mac.type == ixgbe_mac_X550 ||
5244                      hw->mac.type == ixgbe_mac_X550EM_x ||
5245                      hw->mac.type == ixgbe_mac_X550EM_a)
5246                         return 0;
5247
5248         return -ENOTSUP;
5249 }
5250
5251 /*
5252  * Set up link for 82599 loopback mode Tx->Rx.
5253  */
5254 static inline void __rte_cold
5255 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5256 {
5257         PMD_INIT_FUNC_TRACE();
5258
5259         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5260                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5261                                 IXGBE_SUCCESS) {
5262                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5263                         /* ignore error */
5264                         return;
5265                 }
5266         }
5267
5268         /* Restart link */
5269         IXGBE_WRITE_REG(hw,
5270                         IXGBE_AUTOC,
5271                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5272         ixgbe_reset_pipeline_82599(hw);
5273
5274         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5275         msec_delay(50);
5276 }
5277
5278
5279 /*
5280  * Start Transmit and Receive Units.
5281  */
5282 int __rte_cold
5283 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5284 {
5285         struct ixgbe_hw     *hw;
5286         struct ixgbe_tx_queue *txq;
5287         struct ixgbe_rx_queue *rxq;
5288         uint32_t txdctl;
5289         uint32_t dmatxctl;
5290         uint32_t rxctrl;
5291         uint16_t i;
5292         int ret = 0;
5293
5294         PMD_INIT_FUNC_TRACE();
5295         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5296
5297         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5298                 txq = dev->data->tx_queues[i];
5299                 /* Setup Transmit Threshold Registers */
5300                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5301                 txdctl |= txq->pthresh & 0x7F;
5302                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5303                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5304                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5305         }
5306
5307         if (hw->mac.type != ixgbe_mac_82598EB) {
5308                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5309                 dmatxctl |= IXGBE_DMATXCTL_TE;
5310                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5311         }
5312
5313         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5314                 txq = dev->data->tx_queues[i];
5315                 if (!txq->tx_deferred_start) {
5316                         ret = ixgbe_dev_tx_queue_start(dev, i);
5317                         if (ret < 0)
5318                                 return ret;
5319                 }
5320         }
5321
5322         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5323                 rxq = dev->data->rx_queues[i];
5324                 if (!rxq->rx_deferred_start) {
5325                         ret = ixgbe_dev_rx_queue_start(dev, i);
5326                         if (ret < 0)
5327                                 return ret;
5328                 }
5329         }
5330
5331         /* Enable Receive engine */
5332         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5333         if (hw->mac.type == ixgbe_mac_82598EB)
5334                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5335         rxctrl |= IXGBE_RXCTRL_RXEN;
5336         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5337
5338         /* If loopback mode is enabled, set up the link accordingly */
5339         if (dev->data->dev_conf.lpbk_mode != 0) {
5340                 if (hw->mac.type == ixgbe_mac_82599EB)
5341                         ixgbe_setup_loopback_link_82599(hw);
5342                 else if (hw->mac.type == ixgbe_mac_X540 ||
5343                      hw->mac.type == ixgbe_mac_X550 ||
5344                      hw->mac.type == ixgbe_mac_X550EM_x ||
5345                      hw->mac.type == ixgbe_mac_X550EM_a)
5346                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5347         }
5348
5349 #ifdef RTE_LIBRTE_SECURITY
5350         if ((dev->data->dev_conf.rxmode.offloads &
5351                         DEV_RX_OFFLOAD_SECURITY) ||
5352                 (dev->data->dev_conf.txmode.offloads &
5353                         DEV_TX_OFFLOAD_SECURITY)) {
5354                 ret = ixgbe_crypto_enable_ipsec(dev);
5355                 if (ret != 0) {
5356                         PMD_DRV_LOG(ERR,
5357                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5358                                     ret);
5359                         return ret;
5360                 }
5361         }
5362 #endif
5363
5364         return 0;
5365 }
5366
5367 /*
5368  * Start Receive Units for specified queue.
5369  */
5370 int __rte_cold
5371 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5372 {
5373         struct ixgbe_hw     *hw;
5374         struct ixgbe_rx_queue *rxq;
5375         uint32_t rxdctl;
5376         int poll_ms;
5377
5378         PMD_INIT_FUNC_TRACE();
5379         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5380
5381         rxq = dev->data->rx_queues[rx_queue_id];
5382
5383         /* Allocate buffers for descriptor rings */
5384         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5385                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5386                              rx_queue_id);
5387                 return -1;
5388         }
5389         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5390         rxdctl |= IXGBE_RXDCTL_ENABLE;
5391         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5392
5393         /* Wait until RX Enable ready */
5394         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5395         do {
5396                 rte_delay_ms(1);
5397                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5398         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5399         if (!poll_ms)
5400                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5401         rte_wmb();
5402         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5403         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5404         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5405
5406         return 0;
5407 }
5408
5409 /*
5410  * Stop Receive Units for specified queue.
5411  */
5412 int __rte_cold
5413 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5414 {
5415         struct ixgbe_hw     *hw;
5416         struct ixgbe_adapter *adapter = dev->data->dev_private;
5417         struct ixgbe_rx_queue *rxq;
5418         uint32_t rxdctl;
5419         int poll_ms;
5420
5421         PMD_INIT_FUNC_TRACE();
5422         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5423
5424         rxq = dev->data->rx_queues[rx_queue_id];
5425
5426         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5427         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5428         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5429
5430         /* Wait until RX Enable bit clear */
5431         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5432         do {
5433                 rte_delay_ms(1);
5434                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5435         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5436         if (!poll_ms)
5437                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5438
5439         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5440
5441         ixgbe_rx_queue_release_mbufs(rxq);
5442         ixgbe_reset_rx_queue(adapter, rxq);
5443         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5444
5445         return 0;
5446 }
5447
5448
5449 /*
5450  * Start Transmit Units for specified queue.
5451  */
5452 int __rte_cold
5453 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5454 {
5455         struct ixgbe_hw     *hw;
5456         struct ixgbe_tx_queue *txq;
5457         uint32_t txdctl;
5458         int poll_ms;
5459
5460         PMD_INIT_FUNC_TRACE();
5461         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5462
5463         txq = dev->data->tx_queues[tx_queue_id];
5464         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5465         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5466         txdctl |= IXGBE_TXDCTL_ENABLE;
5467         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5468
5469         /* Wait until TX Enable ready */
5470         if (hw->mac.type == ixgbe_mac_82599EB) {
5471                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5472                 do {
5473                         rte_delay_ms(1);
5474                         txdctl = IXGBE_READ_REG(hw,
5475                                 IXGBE_TXDCTL(txq->reg_idx));
5476                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5477                 if (!poll_ms)
5478                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5479                                 tx_queue_id);
5480         }
5481         rte_wmb();
5482         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5483         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5484
5485         return 0;
5486 }
5487
5488 /*
5489  * Stop Transmit Units for specified queue.
5490  */
5491 int __rte_cold
5492 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5493 {
5494         struct ixgbe_hw     *hw;
5495         struct ixgbe_tx_queue *txq;
5496         uint32_t txdctl;
5497         uint32_t txtdh, txtdt;
5498         int poll_ms;
5499
5500         PMD_INIT_FUNC_TRACE();
5501         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5502
5503         txq = dev->data->tx_queues[tx_queue_id];
5504
5505         /* Wait until TX queue is empty */
5506         if (hw->mac.type == ixgbe_mac_82599EB) {
5507                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5508                 do {
5509                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5510                         txtdh = IXGBE_READ_REG(hw,
5511                                                IXGBE_TDH(txq->reg_idx));
5512                         txtdt = IXGBE_READ_REG(hw,
5513                                                IXGBE_TDT(txq->reg_idx));
5514                 } while (--poll_ms && (txtdh != txtdt));
5515                 if (!poll_ms)
5516                         PMD_INIT_LOG(ERR,
5517                                 "Tx Queue %d is not empty when stopping.",
5518                                 tx_queue_id);
5519         }
5520
5521         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5522         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5523         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5524
5525         /* Wait until TX Enable bit clear */
5526         if (hw->mac.type == ixgbe_mac_82599EB) {
5527                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5528                 do {
5529                         rte_delay_ms(1);
5530                         txdctl = IXGBE_READ_REG(hw,
5531                                                 IXGBE_TXDCTL(txq->reg_idx));
5532                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5533                 if (!poll_ms)
5534                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5535                                 tx_queue_id);
5536         }
5537
5538         if (txq->ops != NULL) {
5539                 txq->ops->release_mbufs(txq);
5540                 txq->ops->reset(txq);
5541         }
5542         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5543
5544         return 0;
5545 }
5546
5547 void
5548 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5549         struct rte_eth_rxq_info *qinfo)
5550 {
5551         struct ixgbe_rx_queue *rxq;
5552
5553         rxq = dev->data->rx_queues[queue_id];
5554
5555         qinfo->mp = rxq->mb_pool;
5556         qinfo->scattered_rx = dev->data->scattered_rx;
5557         qinfo->nb_desc = rxq->nb_rx_desc;
5558
5559         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5560         qinfo->conf.rx_drop_en = rxq->drop_en;
5561         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5562         qinfo->conf.offloads = rxq->offloads;
5563 }
5564
5565 void
5566 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5567         struct rte_eth_txq_info *qinfo)
5568 {
5569         struct ixgbe_tx_queue *txq;
5570
5571         txq = dev->data->tx_queues[queue_id];
5572
5573         qinfo->nb_desc = txq->nb_tx_desc;
5574
5575         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5576         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5577         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5578
5579         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5580         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5581         qinfo->conf.offloads = txq->offloads;
5582         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5583 }
5584
5585 /*
5586  * [VF] Initializes Receive Unit.
5587  */
5588 int __rte_cold
5589 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5590 {
5591         struct ixgbe_hw     *hw;
5592         struct ixgbe_rx_queue *rxq;
5593         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5594         uint64_t bus_addr;
5595         uint32_t srrctl, psrtype = 0;
5596         uint16_t buf_size;
5597         uint16_t i;
5598         int ret;
5599
5600         PMD_INIT_FUNC_TRACE();
5601         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5602
5603         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5604                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5605                         "it should be power of 2");
5606                 return -1;
5607         }
5608
5609         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5610                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5611                         "it should be equal to or less than %d",
5612                         hw->mac.max_rx_queues);
5613                 return -1;
5614         }
5615
5616         /*
5617          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5618          * disables the VF receipt of packets if the PF MTU is > 1500.
5619          * This is done to deal with 82599 limitations that imposes
5620          * the PF and all VFs to share the same MTU.
5621          * Then, the PF driver enables again the VF receipt of packet when
5622          * the VF driver issues a IXGBE_VF_SET_LPE request.
5623          * In the meantime, the VF device cannot be used, even if the VF driver
5624          * and the Guest VM network stack are ready to accept packets with a
5625          * size up to the PF MTU.
5626          * As a work-around to this PF behaviour, force the call to
5627          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5628          * VF packets received can work in all cases.
5629          */
5630         ixgbevf_rlpml_set_vf(hw,
5631                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5632
5633         /*
5634          * Assume no header split and no VLAN strip support
5635          * on any Rx queue first .
5636          */
5637         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5638         /* Setup RX queues */
5639         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5640                 rxq = dev->data->rx_queues[i];
5641
5642                 /* Allocate buffers for descriptor rings */
5643                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5644                 if (ret)
5645                         return ret;
5646
5647                 /* Setup the Base and Length of the Rx Descriptor Rings */
5648                 bus_addr = rxq->rx_ring_phys_addr;
5649
5650                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5651                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5652                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5653                                 (uint32_t)(bus_addr >> 32));
5654                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5655                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5656                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5657                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5658
5659
5660                 /* Configure the SRRCTL register */
5661                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5662
5663                 /* Set if packets are dropped when no descriptors available */
5664                 if (rxq->drop_en)
5665                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5666
5667                 /*
5668                  * Configure the RX buffer size in the BSIZEPACKET field of
5669                  * the SRRCTL register of the queue.
5670                  * The value is in 1 KB resolution. Valid values can be from
5671                  * 1 KB to 16 KB.
5672                  */
5673                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5674                         RTE_PKTMBUF_HEADROOM);
5675                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5676                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5677
5678                 /*
5679                  * VF modification to write virtual function SRRCTL register
5680                  */
5681                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5682
5683                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5684                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5685
5686                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5687                     /* It adds dual VLAN length for supporting dual VLAN */
5688                     (rxmode->max_rx_pkt_len +
5689                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5690                         if (!dev->data->scattered_rx)
5691                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5692                         dev->data->scattered_rx = 1;
5693                 }
5694
5695                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5696                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5697         }
5698
5699         /* Set RQPL for VF RSS according to max Rx queue */
5700         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5701                 IXGBE_PSRTYPE_RQPL_SHIFT;
5702         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5703
5704         ixgbe_set_rx_function(dev);
5705
5706         return 0;
5707 }
5708
5709 /*
5710  * [VF] Initializes Transmit Unit.
5711  */
5712 void __rte_cold
5713 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5714 {
5715         struct ixgbe_hw     *hw;
5716         struct ixgbe_tx_queue *txq;
5717         uint64_t bus_addr;
5718         uint32_t txctrl;
5719         uint16_t i;
5720
5721         PMD_INIT_FUNC_TRACE();
5722         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5723
5724         /* Setup the Base and Length of the Tx Descriptor Rings */
5725         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5726                 txq = dev->data->tx_queues[i];
5727                 bus_addr = txq->tx_ring_phys_addr;
5728                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5729                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5730                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5731                                 (uint32_t)(bus_addr >> 32));
5732                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5733                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5734                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5735                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5736                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5737
5738                 /*
5739                  * Disable Tx Head Writeback RO bit, since this hoses
5740                  * bookkeeping if things aren't delivered in order.
5741                  */
5742                 txctrl = IXGBE_READ_REG(hw,
5743                                 IXGBE_VFDCA_TXCTRL(i));
5744                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5745                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5746                                 txctrl);
5747         }
5748 }
5749
5750 /*
5751  * [VF] Start Transmit and Receive Units.
5752  */
5753 void __rte_cold
5754 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5755 {
5756         struct ixgbe_hw     *hw;
5757         struct ixgbe_tx_queue *txq;
5758         struct ixgbe_rx_queue *rxq;
5759         uint32_t txdctl;
5760         uint32_t rxdctl;
5761         uint16_t i;
5762         int poll_ms;
5763
5764         PMD_INIT_FUNC_TRACE();
5765         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5766
5767         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5768                 txq = dev->data->tx_queues[i];
5769                 /* Setup Transmit Threshold Registers */
5770                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5771                 txdctl |= txq->pthresh & 0x7F;
5772                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5773                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5774                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5775         }
5776
5777         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5778
5779                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5780                 txdctl |= IXGBE_TXDCTL_ENABLE;
5781                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5782
5783                 poll_ms = 10;
5784                 /* Wait until TX Enable ready */
5785                 do {
5786                         rte_delay_ms(1);
5787                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5788                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5789                 if (!poll_ms)
5790                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5791         }
5792         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5793
5794                 rxq = dev->data->rx_queues[i];
5795
5796                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5797                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5798                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5799
5800                 /* Wait until RX Enable ready */
5801                 poll_ms = 10;
5802                 do {
5803                         rte_delay_ms(1);
5804                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5805                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5806                 if (!poll_ms)
5807                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5808                 rte_wmb();
5809                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5810
5811         }
5812 }
5813
5814 int
5815 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5816                     const struct rte_flow_action_rss *in)
5817 {
5818         if (in->key_len > RTE_DIM(out->key) ||
5819             in->queue_num > RTE_DIM(out->queue))
5820                 return -EINVAL;
5821         out->conf = (struct rte_flow_action_rss){
5822                 .func = in->func,
5823                 .level = in->level,
5824                 .types = in->types,
5825                 .key_len = in->key_len,
5826                 .queue_num = in->queue_num,
5827                 .key = memcpy(out->key, in->key, in->key_len),
5828                 .queue = memcpy(out->queue, in->queue,
5829                                 sizeof(*in->queue) * in->queue_num),
5830         };
5831         return 0;
5832 }
5833
5834 int
5835 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5836                       const struct rte_flow_action_rss *with)
5837 {
5838         return (comp->func == with->func &&
5839                 comp->level == with->level &&
5840                 comp->types == with->types &&
5841                 comp->key_len == with->key_len &&
5842                 comp->queue_num == with->queue_num &&
5843                 !memcmp(comp->key, with->key, with->key_len) &&
5844                 !memcmp(comp->queue, with->queue,
5845                         sizeof(*with->queue) * with->queue_num));
5846 }
5847
5848 int
5849 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5850                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5851 {
5852         struct ixgbe_hw *hw;
5853         uint32_t reta;
5854         uint16_t i;
5855         uint16_t j;
5856         uint16_t sp_reta_size;
5857         uint32_t reta_reg;
5858         struct rte_eth_rss_conf rss_conf = {
5859                 .rss_key = conf->conf.key_len ?
5860                         (void *)(uintptr_t)conf->conf.key : NULL,
5861                 .rss_key_len = conf->conf.key_len,
5862                 .rss_hf = conf->conf.types,
5863         };
5864         struct ixgbe_filter_info *filter_info =
5865                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5866
5867         PMD_INIT_FUNC_TRACE();
5868         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5869
5870         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5871
5872         if (!add) {
5873                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5874                                           &conf->conf)) {
5875                         ixgbe_rss_disable(dev);
5876                         memset(&filter_info->rss_info, 0,
5877                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5878                         return 0;
5879                 }
5880                 return -EINVAL;
5881         }
5882
5883         if (filter_info->rss_info.conf.queue_num)
5884                 return -EINVAL;
5885         /* Fill in redirection table
5886          * The byte-swap is needed because NIC registers are in
5887          * little-endian order.
5888          */
5889         reta = 0;
5890         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5891                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5892
5893                 if (j == conf->conf.queue_num)
5894                         j = 0;
5895                 reta = (reta << 8) | conf->conf.queue[j];
5896                 if ((i & 3) == 3)
5897                         IXGBE_WRITE_REG(hw, reta_reg,
5898                                         rte_bswap32(reta));
5899         }
5900
5901         /* Configure the RSS key and the RSS protocols used to compute
5902          * the RSS hash of input packets.
5903          */
5904         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5905                 ixgbe_rss_disable(dev);
5906                 return 0;
5907         }
5908         if (rss_conf.rss_key == NULL)
5909                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5910         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5911
5912         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5913                 return -EINVAL;
5914
5915         return 0;
5916 }
5917
5918 /* Stubs needed for linkage when CONFIG_RTE_ARCH_PPC_64 is set */
5919 #if defined(RTE_ARCH_PPC_64)
5920 int
5921 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5922 {
5923         return -1;
5924 }
5925
5926 uint16_t
5927 ixgbe_recv_pkts_vec(
5928         void __rte_unused *rx_queue,
5929         struct rte_mbuf __rte_unused **rx_pkts,
5930         uint16_t __rte_unused nb_pkts)
5931 {
5932         return 0;
5933 }
5934
5935 uint16_t
5936 ixgbe_recv_scattered_pkts_vec(
5937         void __rte_unused *rx_queue,
5938         struct rte_mbuf __rte_unused **rx_pkts,
5939         uint16_t __rte_unused nb_pkts)
5940 {
5941         return 0;
5942 }
5943
5944 int
5945 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5946 {
5947         return -1;
5948 }
5949
5950 uint16_t
5951 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5952                 struct rte_mbuf __rte_unused **tx_pkts,
5953                 uint16_t __rte_unused nb_pkts)
5954 {
5955         return 0;
5956 }
5957
5958 int
5959 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5960 {
5961         return -1;
5962 }
5963
5964 void
5965 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
5966 {
5967         return;
5968 }
5969 #endif