f41dc13d5eec5709442825a337058451a8be1d5a
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45
46 #include "ixgbe_logs.h"
47 #include "base/ixgbe_api.h"
48 #include "base/ixgbe_vf.h"
49 #include "ixgbe_ethdev.h"
50 #include "base/ixgbe_dcb.h"
51 #include "base/ixgbe_common.h"
52 #include "ixgbe_rxtx.h"
53
54 #ifdef RTE_LIBRTE_IEEE1588
55 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
56 #else
57 #define IXGBE_TX_IEEE1588_TMST 0
58 #endif
59 /* Bit Mask to indicate what bits required for building TX context */
60 #define IXGBE_TX_OFFLOAD_MASK (                  \
61                 PKT_TX_OUTER_IPV6 |              \
62                 PKT_TX_OUTER_IPV4 |              \
63                 PKT_TX_IPV6 |                    \
64                 PKT_TX_IPV4 |                    \
65                 PKT_TX_VLAN_PKT |                \
66                 PKT_TX_IP_CKSUM |                \
67                 PKT_TX_L4_MASK |                 \
68                 PKT_TX_TCP_SEG |                 \
69                 PKT_TX_MACSEC |                  \
70                 PKT_TX_OUTER_IP_CKSUM |          \
71                 PKT_TX_SEC_OFFLOAD |     \
72                 IXGBE_TX_IEEE1588_TMST)
73
74 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
75                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
76
77 #if 1
78 #define RTE_PMD_USE_PREFETCH
79 #endif
80
81 #ifdef RTE_PMD_USE_PREFETCH
82 /*
83  * Prefetch a cache line into all cache levels.
84  */
85 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
86 #else
87 #define rte_ixgbe_prefetch(p)   do {} while (0)
88 #endif
89
90 #ifdef RTE_IXGBE_INC_VECTOR
91 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
92                                     uint16_t nb_pkts);
93 #endif
94
95 /*********************************************************************
96  *
97  *  TX functions
98  *
99  **********************************************************************/
100
101 /*
102  * Check for descriptors with their DD bit set and free mbufs.
103  * Return the total number of buffers freed.
104  */
105 static __rte_always_inline int
106 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
107 {
108         struct ixgbe_tx_entry *txep;
109         uint32_t status;
110         int i, nb_free = 0;
111         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
112
113         /* check DD bit on threshold descriptor */
114         status = txq->tx_ring[txq->tx_next_dd].wb.status;
115         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
116                 return 0;
117
118         /*
119          * first buffer to free from S/W ring is at index
120          * tx_next_dd - (tx_rs_thresh-1)
121          */
122         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
123
124         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
125                 /* free buffers one at a time */
126                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
127                 txep->mbuf = NULL;
128
129                 if (unlikely(m == NULL))
130                         continue;
131
132                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
133                     (nb_free > 0 && m->pool != free[0]->pool)) {
134                         rte_mempool_put_bulk(free[0]->pool,
135                                              (void **)free, nb_free);
136                         nb_free = 0;
137                 }
138
139                 free[nb_free++] = m;
140         }
141
142         if (nb_free > 0)
143                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
144
145         /* buffers were freed, update counters */
146         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
147         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
148         if (txq->tx_next_dd >= txq->nb_tx_desc)
149                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
150
151         return txq->tx_rs_thresh;
152 }
153
154 /* Populate 4 descriptors with data from 4 mbufs */
155 static inline void
156 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
157 {
158         uint64_t buf_dma_addr;
159         uint32_t pkt_len;
160         int i;
161
162         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
163                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
164                 pkt_len = (*pkts)->data_len;
165
166                 /* write data to descriptor */
167                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
168
169                 txdp->read.cmd_type_len =
170                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
171
172                 txdp->read.olinfo_status =
173                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
174
175                 rte_prefetch0(&(*pkts)->pool);
176         }
177 }
178
179 /* Populate 1 descriptor with data from 1 mbuf */
180 static inline void
181 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
182 {
183         uint64_t buf_dma_addr;
184         uint32_t pkt_len;
185
186         buf_dma_addr = rte_mbuf_data_iova(*pkts);
187         pkt_len = (*pkts)->data_len;
188
189         /* write data to descriptor */
190         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
191         txdp->read.cmd_type_len =
192                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
193         txdp->read.olinfo_status =
194                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
195         rte_prefetch0(&(*pkts)->pool);
196 }
197
198 /*
199  * Fill H/W descriptor ring with mbuf data.
200  * Copy mbuf pointers to the S/W ring.
201  */
202 static inline void
203 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
204                       uint16_t nb_pkts)
205 {
206         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
207         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
208         const int N_PER_LOOP = 4;
209         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
210         int mainpart, leftover;
211         int i, j;
212
213         /*
214          * Process most of the packets in chunks of N pkts.  Any
215          * leftover packets will get processed one at a time.
216          */
217         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
218         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
219         for (i = 0; i < mainpart; i += N_PER_LOOP) {
220                 /* Copy N mbuf pointers to the S/W ring */
221                 for (j = 0; j < N_PER_LOOP; ++j) {
222                         (txep + i + j)->mbuf = *(pkts + i + j);
223                 }
224                 tx4(txdp + i, pkts + i);
225         }
226
227         if (unlikely(leftover > 0)) {
228                 for (i = 0; i < leftover; ++i) {
229                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
230                         tx1(txdp + mainpart + i, pkts + mainpart + i);
231                 }
232         }
233 }
234
235 static inline uint16_t
236 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
237              uint16_t nb_pkts)
238 {
239         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
240         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
241         uint16_t n = 0;
242
243         /*
244          * Begin scanning the H/W ring for done descriptors when the
245          * number of available descriptors drops below tx_free_thresh.  For
246          * each done descriptor, free the associated buffer.
247          */
248         if (txq->nb_tx_free < txq->tx_free_thresh)
249                 ixgbe_tx_free_bufs(txq);
250
251         /* Only use descriptors that are available */
252         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
253         if (unlikely(nb_pkts == 0))
254                 return 0;
255
256         /* Use exactly nb_pkts descriptors */
257         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
258
259         /*
260          * At this point, we know there are enough descriptors in the
261          * ring to transmit all the packets.  This assumes that each
262          * mbuf contains a single segment, and that no new offloads
263          * are expected, which would require a new context descriptor.
264          */
265
266         /*
267          * See if we're going to wrap-around. If so, handle the top
268          * of the descriptor ring first, then do the bottom.  If not,
269          * the processing looks just like the "bottom" part anyway...
270          */
271         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
272                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
273                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
274
275                 /*
276                  * We know that the last descriptor in the ring will need to
277                  * have its RS bit set because tx_rs_thresh has to be
278                  * a divisor of the ring size
279                  */
280                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
281                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
282                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
283
284                 txq->tx_tail = 0;
285         }
286
287         /* Fill H/W descriptor ring with mbuf data */
288         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
289         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
290
291         /*
292          * Determine if RS bit should be set
293          * This is what we actually want:
294          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
295          * but instead of subtracting 1 and doing >=, we can just do
296          * greater than without subtracting.
297          */
298         if (txq->tx_tail > txq->tx_next_rs) {
299                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
300                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
301                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
302                                                 txq->tx_rs_thresh);
303                 if (txq->tx_next_rs >= txq->nb_tx_desc)
304                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
305         }
306
307         /*
308          * Check for wrap-around. This would only happen if we used
309          * up to the last descriptor in the ring, no more, no less.
310          */
311         if (txq->tx_tail >= txq->nb_tx_desc)
312                 txq->tx_tail = 0;
313
314         /* update tail pointer */
315         rte_wmb();
316         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
317
318         return nb_pkts;
319 }
320
321 uint16_t
322 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
323                        uint16_t nb_pkts)
324 {
325         uint16_t nb_tx;
326
327         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
328         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
329                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
330
331         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
332         nb_tx = 0;
333         while (nb_pkts) {
334                 uint16_t ret, n;
335
336                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
337                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
338                 nb_tx = (uint16_t)(nb_tx + ret);
339                 nb_pkts = (uint16_t)(nb_pkts - ret);
340                 if (ret < n)
341                         break;
342         }
343
344         return nb_tx;
345 }
346
347 #ifdef RTE_IXGBE_INC_VECTOR
348 static uint16_t
349 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
350                     uint16_t nb_pkts)
351 {
352         uint16_t nb_tx = 0;
353         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
354
355         while (nb_pkts) {
356                 uint16_t ret, num;
357
358                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
359                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
360                                                  num);
361                 nb_tx += ret;
362                 nb_pkts -= ret;
363                 if (ret < num)
364                         break;
365         }
366
367         return nb_tx;
368 }
369 #endif
370
371 static inline void
372 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
373                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
374                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
375                 __rte_unused uint64_t *mdata)
376 {
377         uint32_t type_tucmd_mlhl;
378         uint32_t mss_l4len_idx = 0;
379         uint32_t ctx_idx;
380         uint32_t vlan_macip_lens;
381         union ixgbe_tx_offload tx_offload_mask;
382         uint32_t seqnum_seed = 0;
383
384         ctx_idx = txq->ctx_curr;
385         tx_offload_mask.data[0] = 0;
386         tx_offload_mask.data[1] = 0;
387         type_tucmd_mlhl = 0;
388
389         /* Specify which HW CTX to upload. */
390         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
391
392         if (ol_flags & PKT_TX_VLAN_PKT) {
393                 tx_offload_mask.vlan_tci |= ~0;
394         }
395
396         /* check if TCP segmentation required for this packet */
397         if (ol_flags & PKT_TX_TCP_SEG) {
398                 /* implies IP cksum in IPv4 */
399                 if (ol_flags & PKT_TX_IP_CKSUM)
400                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
401                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
402                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
403                 else
404                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
405                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
406                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
407
408                 tx_offload_mask.l2_len |= ~0;
409                 tx_offload_mask.l3_len |= ~0;
410                 tx_offload_mask.l4_len |= ~0;
411                 tx_offload_mask.tso_segsz |= ~0;
412                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
413                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
414         } else { /* no TSO, check if hardware checksum is needed */
415                 if (ol_flags & PKT_TX_IP_CKSUM) {
416                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
417                         tx_offload_mask.l2_len |= ~0;
418                         tx_offload_mask.l3_len |= ~0;
419                 }
420
421                 switch (ol_flags & PKT_TX_L4_MASK) {
422                 case PKT_TX_UDP_CKSUM:
423                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
424                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
425                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
426                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
427                         tx_offload_mask.l2_len |= ~0;
428                         tx_offload_mask.l3_len |= ~0;
429                         break;
430                 case PKT_TX_TCP_CKSUM:
431                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
432                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
434                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
435                         tx_offload_mask.l2_len |= ~0;
436                         tx_offload_mask.l3_len |= ~0;
437                         break;
438                 case PKT_TX_SCTP_CKSUM:
439                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
440                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
441                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
442                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
443                         tx_offload_mask.l2_len |= ~0;
444                         tx_offload_mask.l3_len |= ~0;
445                         break;
446                 default:
447                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
448                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
449                         break;
450                 }
451         }
452
453         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
454                 tx_offload_mask.outer_l2_len |= ~0;
455                 tx_offload_mask.outer_l3_len |= ~0;
456                 tx_offload_mask.l2_len |= ~0;
457                 seqnum_seed |= tx_offload.outer_l3_len
458                                << IXGBE_ADVTXD_OUTER_IPLEN;
459                 seqnum_seed |= tx_offload.l2_len
460                                << IXGBE_ADVTXD_TUNNEL_LEN;
461         }
462 #ifdef RTE_LIBRTE_SECURITY
463         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
464                 union ixgbe_crypto_tx_desc_md *md =
465                                 (union ixgbe_crypto_tx_desc_md *)mdata;
466                 seqnum_seed |=
467                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
468                 type_tucmd_mlhl |= md->enc ?
469                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
470                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
471                 type_tucmd_mlhl |=
472                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
473                 tx_offload_mask.sa_idx |= ~0;
474                 tx_offload_mask.sec_pad_len |= ~0;
475         }
476 #endif
477
478         txq->ctx_cache[ctx_idx].flags = ol_flags;
479         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
480                 tx_offload_mask.data[0] & tx_offload.data[0];
481         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
482                 tx_offload_mask.data[1] & tx_offload.data[1];
483         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
484
485         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
486         vlan_macip_lens = tx_offload.l3_len;
487         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
488                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
489                                     IXGBE_ADVTXD_MACLEN_SHIFT);
490         else
491                 vlan_macip_lens |= (tx_offload.l2_len <<
492                                     IXGBE_ADVTXD_MACLEN_SHIFT);
493         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
494         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
495         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
496         ctx_txd->seqnum_seed     = seqnum_seed;
497 }
498
499 /*
500  * Check which hardware context can be used. Use the existing match
501  * or create a new context descriptor.
502  */
503 static inline uint32_t
504 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
505                    union ixgbe_tx_offload tx_offload)
506 {
507         /* If match with the current used context */
508         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
509                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
510                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
511                      & tx_offload.data[0])) &&
512                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
513                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
514                      & tx_offload.data[1]))))
515                 return txq->ctx_curr;
516
517         /* What if match with the next context  */
518         txq->ctx_curr ^= 1;
519         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
520                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
521                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
522                      & tx_offload.data[0])) &&
523                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
524                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
525                      & tx_offload.data[1]))))
526                 return txq->ctx_curr;
527
528         /* Mismatch, use the previous context */
529         return IXGBE_CTX_NUM;
530 }
531
532 static inline uint32_t
533 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
534 {
535         uint32_t tmp = 0;
536
537         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
538                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
539         if (ol_flags & PKT_TX_IP_CKSUM)
540                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
541         if (ol_flags & PKT_TX_TCP_SEG)
542                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
543         return tmp;
544 }
545
546 static inline uint32_t
547 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
548 {
549         uint32_t cmdtype = 0;
550
551         if (ol_flags & PKT_TX_VLAN_PKT)
552                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
553         if (ol_flags & PKT_TX_TCP_SEG)
554                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
555         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
556                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
557         if (ol_flags & PKT_TX_MACSEC)
558                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
559         return cmdtype;
560 }
561
562 /* Default RS bit threshold values */
563 #ifndef DEFAULT_TX_RS_THRESH
564 #define DEFAULT_TX_RS_THRESH   32
565 #endif
566 #ifndef DEFAULT_TX_FREE_THRESH
567 #define DEFAULT_TX_FREE_THRESH 32
568 #endif
569
570 /* Reset transmit descriptors after they have been used */
571 static inline int
572 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
573 {
574         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
575         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
576         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
577         uint16_t nb_tx_desc = txq->nb_tx_desc;
578         uint16_t desc_to_clean_to;
579         uint16_t nb_tx_to_clean;
580         uint32_t status;
581
582         /* Determine the last descriptor needing to be cleaned */
583         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
584         if (desc_to_clean_to >= nb_tx_desc)
585                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
586
587         /* Check to make sure the last descriptor to clean is done */
588         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
589         status = txr[desc_to_clean_to].wb.status;
590         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
591                 PMD_TX_FREE_LOG(DEBUG,
592                                 "TX descriptor %4u is not done"
593                                 "(port=%d queue=%d)",
594                                 desc_to_clean_to,
595                                 txq->port_id, txq->queue_id);
596                 /* Failed to clean any descriptors, better luck next time */
597                 return -(1);
598         }
599
600         /* Figure out how many descriptors will be cleaned */
601         if (last_desc_cleaned > desc_to_clean_to)
602                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
603                                                         desc_to_clean_to);
604         else
605                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
606                                                 last_desc_cleaned);
607
608         PMD_TX_FREE_LOG(DEBUG,
609                         "Cleaning %4u TX descriptors: %4u to %4u "
610                         "(port=%d queue=%d)",
611                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
612                         txq->port_id, txq->queue_id);
613
614         /*
615          * The last descriptor to clean is done, so that means all the
616          * descriptors from the last descriptor that was cleaned
617          * up to the last descriptor with the RS bit set
618          * are done. Only reset the threshold descriptor.
619          */
620         txr[desc_to_clean_to].wb.status = 0;
621
622         /* Update the txq to reflect the last descriptor that was cleaned */
623         txq->last_desc_cleaned = desc_to_clean_to;
624         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
625
626         /* No Error */
627         return 0;
628 }
629
630 uint16_t
631 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
632                 uint16_t nb_pkts)
633 {
634         struct ixgbe_tx_queue *txq;
635         struct ixgbe_tx_entry *sw_ring;
636         struct ixgbe_tx_entry *txe, *txn;
637         volatile union ixgbe_adv_tx_desc *txr;
638         volatile union ixgbe_adv_tx_desc *txd, *txp;
639         struct rte_mbuf     *tx_pkt;
640         struct rte_mbuf     *m_seg;
641         uint64_t buf_dma_addr;
642         uint32_t olinfo_status;
643         uint32_t cmd_type_len;
644         uint32_t pkt_len;
645         uint16_t slen;
646         uint64_t ol_flags;
647         uint16_t tx_id;
648         uint16_t tx_last;
649         uint16_t nb_tx;
650         uint16_t nb_used;
651         uint64_t tx_ol_req;
652         uint32_t ctx = 0;
653         uint32_t new_ctx;
654         union ixgbe_tx_offload tx_offload;
655 #ifdef RTE_LIBRTE_SECURITY
656         uint8_t use_ipsec;
657 #endif
658
659         tx_offload.data[0] = 0;
660         tx_offload.data[1] = 0;
661         txq = tx_queue;
662         sw_ring = txq->sw_ring;
663         txr     = txq->tx_ring;
664         tx_id   = txq->tx_tail;
665         txe = &sw_ring[tx_id];
666         txp = NULL;
667
668         /* Determine if the descriptor ring needs to be cleaned. */
669         if (txq->nb_tx_free < txq->tx_free_thresh)
670                 ixgbe_xmit_cleanup(txq);
671
672         rte_prefetch0(&txe->mbuf->pool);
673
674         /* TX loop */
675         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
676                 new_ctx = 0;
677                 tx_pkt = *tx_pkts++;
678                 pkt_len = tx_pkt->pkt_len;
679
680                 /*
681                  * Determine how many (if any) context descriptors
682                  * are needed for offload functionality.
683                  */
684                 ol_flags = tx_pkt->ol_flags;
685 #ifdef RTE_LIBRTE_SECURITY
686                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
687 #endif
688
689                 /* If hardware offload required */
690                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
691                 if (tx_ol_req) {
692                         tx_offload.l2_len = tx_pkt->l2_len;
693                         tx_offload.l3_len = tx_pkt->l3_len;
694                         tx_offload.l4_len = tx_pkt->l4_len;
695                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
696                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
697                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
698                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
699 #ifdef RTE_LIBRTE_SECURITY
700                         if (use_ipsec) {
701                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
702                                         (union ixgbe_crypto_tx_desc_md *)
703                                                         &tx_pkt->udata64;
704                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
705                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
706                         }
707 #endif
708
709                         /* If new context need be built or reuse the exist ctx. */
710                         ctx = what_advctx_update(txq, tx_ol_req,
711                                 tx_offload);
712                         /* Only allocate context descriptor if required*/
713                         new_ctx = (ctx == IXGBE_CTX_NUM);
714                         ctx = txq->ctx_curr;
715                 }
716
717                 /*
718                  * Keep track of how many descriptors are used this loop
719                  * This will always be the number of segments + the number of
720                  * Context descriptors required to transmit the packet
721                  */
722                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
723
724                 if (txp != NULL &&
725                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
726                         /* set RS on the previous packet in the burst */
727                         txp->read.cmd_type_len |=
728                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
729
730                 /*
731                  * The number of descriptors that must be allocated for a
732                  * packet is the number of segments of that packet, plus 1
733                  * Context Descriptor for the hardware offload, if any.
734                  * Determine the last TX descriptor to allocate in the TX ring
735                  * for the packet, starting from the current position (tx_id)
736                  * in the ring.
737                  */
738                 tx_last = (uint16_t) (tx_id + nb_used - 1);
739
740                 /* Circular ring */
741                 if (tx_last >= txq->nb_tx_desc)
742                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
743
744                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
745                            " tx_first=%u tx_last=%u",
746                            (unsigned) txq->port_id,
747                            (unsigned) txq->queue_id,
748                            (unsigned) pkt_len,
749                            (unsigned) tx_id,
750                            (unsigned) tx_last);
751
752                 /*
753                  * Make sure there are enough TX descriptors available to
754                  * transmit the entire packet.
755                  * nb_used better be less than or equal to txq->tx_rs_thresh
756                  */
757                 if (nb_used > txq->nb_tx_free) {
758                         PMD_TX_FREE_LOG(DEBUG,
759                                         "Not enough free TX descriptors "
760                                         "nb_used=%4u nb_free=%4u "
761                                         "(port=%d queue=%d)",
762                                         nb_used, txq->nb_tx_free,
763                                         txq->port_id, txq->queue_id);
764
765                         if (ixgbe_xmit_cleanup(txq) != 0) {
766                                 /* Could not clean any descriptors */
767                                 if (nb_tx == 0)
768                                         return 0;
769                                 goto end_of_tx;
770                         }
771
772                         /* nb_used better be <= txq->tx_rs_thresh */
773                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
774                                 PMD_TX_FREE_LOG(DEBUG,
775                                         "The number of descriptors needed to "
776                                         "transmit the packet exceeds the "
777                                         "RS bit threshold. This will impact "
778                                         "performance."
779                                         "nb_used=%4u nb_free=%4u "
780                                         "tx_rs_thresh=%4u. "
781                                         "(port=%d queue=%d)",
782                                         nb_used, txq->nb_tx_free,
783                                         txq->tx_rs_thresh,
784                                         txq->port_id, txq->queue_id);
785                                 /*
786                                  * Loop here until there are enough TX
787                                  * descriptors or until the ring cannot be
788                                  * cleaned.
789                                  */
790                                 while (nb_used > txq->nb_tx_free) {
791                                         if (ixgbe_xmit_cleanup(txq) != 0) {
792                                                 /*
793                                                  * Could not clean any
794                                                  * descriptors
795                                                  */
796                                                 if (nb_tx == 0)
797                                                         return 0;
798                                                 goto end_of_tx;
799                                         }
800                                 }
801                         }
802                 }
803
804                 /*
805                  * By now there are enough free TX descriptors to transmit
806                  * the packet.
807                  */
808
809                 /*
810                  * Set common flags of all TX Data Descriptors.
811                  *
812                  * The following bits must be set in all Data Descriptors:
813                  *   - IXGBE_ADVTXD_DTYP_DATA
814                  *   - IXGBE_ADVTXD_DCMD_DEXT
815                  *
816                  * The following bits must be set in the first Data Descriptor
817                  * and are ignored in the other ones:
818                  *   - IXGBE_ADVTXD_DCMD_IFCS
819                  *   - IXGBE_ADVTXD_MAC_1588
820                  *   - IXGBE_ADVTXD_DCMD_VLE
821                  *
822                  * The following bits must only be set in the last Data
823                  * Descriptor:
824                  *   - IXGBE_TXD_CMD_EOP
825                  *
826                  * The following bits can be set in any Data Descriptor, but
827                  * are only set in the last Data Descriptor:
828                  *   - IXGBE_TXD_CMD_RS
829                  */
830                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
831                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
832
833 #ifdef RTE_LIBRTE_IEEE1588
834                 if (ol_flags & PKT_TX_IEEE1588_TMST)
835                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
836 #endif
837
838                 olinfo_status = 0;
839                 if (tx_ol_req) {
840
841                         if (ol_flags & PKT_TX_TCP_SEG) {
842                                 /* when TSO is on, paylen in descriptor is the
843                                  * not the packet len but the tcp payload len */
844                                 pkt_len -= (tx_offload.l2_len +
845                                         tx_offload.l3_len + tx_offload.l4_len);
846                         }
847
848                         /*
849                          * Setup the TX Advanced Context Descriptor if required
850                          */
851                         if (new_ctx) {
852                                 volatile struct ixgbe_adv_tx_context_desc *
853                                     ctx_txd;
854
855                                 ctx_txd = (volatile struct
856                                     ixgbe_adv_tx_context_desc *)
857                                     &txr[tx_id];
858
859                                 txn = &sw_ring[txe->next_id];
860                                 rte_prefetch0(&txn->mbuf->pool);
861
862                                 if (txe->mbuf != NULL) {
863                                         rte_pktmbuf_free_seg(txe->mbuf);
864                                         txe->mbuf = NULL;
865                                 }
866
867                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
868                                         tx_offload, &tx_pkt->udata64);
869
870                                 txe->last_id = tx_last;
871                                 tx_id = txe->next_id;
872                                 txe = txn;
873                         }
874
875                         /*
876                          * Setup the TX Advanced Data Descriptor,
877                          * This path will go through
878                          * whatever new/reuse the context descriptor
879                          */
880                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
881                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
882                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
883                 }
884
885                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
886 #ifdef RTE_LIBRTE_SECURITY
887                 if (use_ipsec)
888                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
889 #endif
890
891                 m_seg = tx_pkt;
892                 do {
893                         txd = &txr[tx_id];
894                         txn = &sw_ring[txe->next_id];
895                         rte_prefetch0(&txn->mbuf->pool);
896
897                         if (txe->mbuf != NULL)
898                                 rte_pktmbuf_free_seg(txe->mbuf);
899                         txe->mbuf = m_seg;
900
901                         /*
902                          * Set up Transmit Data Descriptor.
903                          */
904                         slen = m_seg->data_len;
905                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
906                         txd->read.buffer_addr =
907                                 rte_cpu_to_le_64(buf_dma_addr);
908                         txd->read.cmd_type_len =
909                                 rte_cpu_to_le_32(cmd_type_len | slen);
910                         txd->read.olinfo_status =
911                                 rte_cpu_to_le_32(olinfo_status);
912                         txe->last_id = tx_last;
913                         tx_id = txe->next_id;
914                         txe = txn;
915                         m_seg = m_seg->next;
916                 } while (m_seg != NULL);
917
918                 /*
919                  * The last packet data descriptor needs End Of Packet (EOP)
920                  */
921                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
922                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
923                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
924
925                 /* Set RS bit only on threshold packets' last descriptor */
926                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
927                         PMD_TX_FREE_LOG(DEBUG,
928                                         "Setting RS bit on TXD id="
929                                         "%4u (port=%d queue=%d)",
930                                         tx_last, txq->port_id, txq->queue_id);
931
932                         cmd_type_len |= IXGBE_TXD_CMD_RS;
933
934                         /* Update txq RS bit counters */
935                         txq->nb_tx_used = 0;
936                         txp = NULL;
937                 } else
938                         txp = txd;
939
940                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
941         }
942
943 end_of_tx:
944         /* set RS on last packet in the burst */
945         if (txp != NULL)
946                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
947
948         rte_wmb();
949
950         /*
951          * Set the Transmit Descriptor Tail (TDT)
952          */
953         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
954                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
955                    (unsigned) tx_id, (unsigned) nb_tx);
956         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
957         txq->tx_tail = tx_id;
958
959         return nb_tx;
960 }
961
962 /*********************************************************************
963  *
964  *  TX prep functions
965  *
966  **********************************************************************/
967 uint16_t
968 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
969 {
970         int i, ret;
971         uint64_t ol_flags;
972         struct rte_mbuf *m;
973         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
974
975         for (i = 0; i < nb_pkts; i++) {
976                 m = tx_pkts[i];
977                 ol_flags = m->ol_flags;
978
979                 /**
980                  * Check if packet meets requirements for number of segments
981                  *
982                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
983                  *       non-TSO
984                  */
985
986                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
987                         rte_errno = EINVAL;
988                         return i;
989                 }
990
991                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
992                         rte_errno = ENOTSUP;
993                         return i;
994                 }
995
996 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
997                 ret = rte_validate_tx_offload(m);
998                 if (ret != 0) {
999                         rte_errno = -ret;
1000                         return i;
1001                 }
1002 #endif
1003                 ret = rte_net_intel_cksum_prepare(m);
1004                 if (ret != 0) {
1005                         rte_errno = -ret;
1006                         return i;
1007                 }
1008         }
1009
1010         return i;
1011 }
1012
1013 /*********************************************************************
1014  *
1015  *  RX functions
1016  *
1017  **********************************************************************/
1018
1019 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1020 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1021 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1022 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1023 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1024 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1028 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1029 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1030 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1031 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1032 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1036 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1040 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1044 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1048 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1052
1053 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1076
1077 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1100
1101 /**
1102  * Use 2 different table for normal packet and tunnel packet
1103  * to save the space.
1104  */
1105 const uint32_t
1106         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1107         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1108         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1109                 RTE_PTYPE_L3_IPV4,
1110         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1112         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1114         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1116         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4_EXT,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1122         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1124         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV6,
1126         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1128         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1130         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1132         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6_EXT,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1138         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1140         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1142                 RTE_PTYPE_INNER_L3_IPV6,
1143         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1144                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1145                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1146         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1147                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1148         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1149         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1150                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1151                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1152         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1153                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1154                 RTE_PTYPE_INNER_L3_IPV6,
1155         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1156                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1157                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1158         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1159                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1160                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1161         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1162                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1163                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1164         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1165                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1166                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1167         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1168                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1169                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1170         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1171                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1172                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1173         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1174                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1175                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1176         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1178                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1179         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1180                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1181                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1182         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1183                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1184                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1185         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1186                 RTE_PTYPE_L2_ETHER |
1187                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1188                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1189 };
1190
1191 const uint32_t
1192         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1193         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1194                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1195                 RTE_PTYPE_INNER_L2_ETHER,
1196         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1197                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1198                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1199         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1200                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1201                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1202         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1203                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1205         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1206                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1207                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1208         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1209                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1211         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1212                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1214         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1215                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1216                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1217                 RTE_PTYPE_INNER_L4_TCP,
1218         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1219                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1220                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1221                 RTE_PTYPE_INNER_L4_TCP,
1222         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1223                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1224                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1226                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1228                 RTE_PTYPE_INNER_L4_TCP,
1229         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1230                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1231                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1232                 RTE_PTYPE_INNER_L3_IPV4,
1233         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1234                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1236                 RTE_PTYPE_INNER_L4_UDP,
1237         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1238                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1240                 RTE_PTYPE_INNER_L4_UDP,
1241         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1242                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1243                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1244                 RTE_PTYPE_INNER_L4_SCTP,
1245         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1246                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1247                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1248         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1249                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1250                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1251                 RTE_PTYPE_INNER_L4_UDP,
1252         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1253                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1254                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1255                 RTE_PTYPE_INNER_L4_SCTP,
1256         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1257                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1258                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1259                 RTE_PTYPE_INNER_L3_IPV4,
1260         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1261                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1262                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1263                 RTE_PTYPE_INNER_L4_SCTP,
1264         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1265                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1266                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1267                 RTE_PTYPE_INNER_L4_SCTP,
1268         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1269                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1270                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1271                 RTE_PTYPE_INNER_L4_TCP,
1272         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1273                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1274                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1275                 RTE_PTYPE_INNER_L4_UDP,
1276
1277         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1278                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1280         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1281                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1282                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1283                 RTE_PTYPE_INNER_L3_IPV4,
1284         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1285                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1286                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1287                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1288         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1289                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1290                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1291                 RTE_PTYPE_INNER_L3_IPV6,
1292         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1293                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1294                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1295                 RTE_PTYPE_INNER_L3_IPV4,
1296         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1297                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1298                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1299                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1300         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1301                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1302                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1303                 RTE_PTYPE_INNER_L3_IPV4,
1304         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1305                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1306                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1307                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1308         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1309                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1310                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1311                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1312         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1313                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1314                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1315                 RTE_PTYPE_INNER_L3_IPV4,
1316         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1317                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1318                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1319                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1320         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1321                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1322                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1323                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1324         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1325                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1326                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1327                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1328         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1329                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1330                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1331                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1332         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1333                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1334                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1335                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1336         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1337                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1338                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1339                 RTE_PTYPE_INNER_L3_IPV4,
1340         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1341                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1342                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1343                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1344         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1345                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1346                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1347                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1348         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1349                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1350                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1351                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1352         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1353                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1354                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1355                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1356         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1357                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1358                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1359                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1360         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1361                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1362                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1363                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1364         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1365                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1366                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1367                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1368 };
1369
1370 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1371 static inline uint32_t
1372 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1373 {
1374
1375         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1376                 return RTE_PTYPE_UNKNOWN;
1377
1378         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1379
1380         /* For tunnel packet */
1381         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1382                 /* Remove the tunnel bit to save the space. */
1383                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1384                 return ptype_table_tn[pkt_info];
1385         }
1386
1387         /**
1388          * For x550, if it's not tunnel,
1389          * tunnel type bit should be set to 0.
1390          * Reuse 82599's mask.
1391          */
1392         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1393
1394         return ptype_table[pkt_info];
1395 }
1396
1397 static inline uint64_t
1398 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1399 {
1400         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1401                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1402                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1403                 PKT_RX_RSS_HASH, 0, 0, 0,
1404                 0, 0, 0,  PKT_RX_FDIR,
1405         };
1406 #ifdef RTE_LIBRTE_IEEE1588
1407         static uint64_t ip_pkt_etqf_map[8] = {
1408                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1409                 0, 0, 0, 0,
1410         };
1411
1412         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1413                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1414                                 ip_rss_types_map[pkt_info & 0XF];
1415         else
1416                 return ip_rss_types_map[pkt_info & 0XF];
1417 #else
1418         return ip_rss_types_map[pkt_info & 0XF];
1419 #endif
1420 }
1421
1422 static inline uint64_t
1423 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1424 {
1425         uint64_t pkt_flags;
1426
1427         /*
1428          * Check if VLAN present only.
1429          * Do not check whether L3/L4 rx checksum done by NIC or not,
1430          * That can be found from rte_eth_rxmode.offloads flag
1431          */
1432         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1433
1434 #ifdef RTE_LIBRTE_IEEE1588
1435         if (rx_status & IXGBE_RXD_STAT_TMST)
1436                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1437 #endif
1438         return pkt_flags;
1439 }
1440
1441 static inline uint64_t
1442 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1443 {
1444         uint64_t pkt_flags;
1445
1446         /*
1447          * Bit 31: IPE, IPv4 checksum error
1448          * Bit 30: L4I, L4I integrity error
1449          */
1450         static uint64_t error_to_pkt_flags_map[4] = {
1451                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1452                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1453                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1454                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1455         };
1456         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1457                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1458
1459         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1460             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1461                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1462         }
1463
1464 #ifdef RTE_LIBRTE_SECURITY
1465         if (rx_status & IXGBE_RXD_STAT_SECP) {
1466                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1467                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1468                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1469         }
1470 #endif
1471
1472         return pkt_flags;
1473 }
1474
1475 /*
1476  * LOOK_AHEAD defines how many desc statuses to check beyond the
1477  * current descriptor.
1478  * It must be a pound define for optimal performance.
1479  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1480  * function only works with LOOK_AHEAD=8.
1481  */
1482 #define LOOK_AHEAD 8
1483 #if (LOOK_AHEAD != 8)
1484 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1485 #endif
1486 static inline int
1487 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1488 {
1489         volatile union ixgbe_adv_rx_desc *rxdp;
1490         struct ixgbe_rx_entry *rxep;
1491         struct rte_mbuf *mb;
1492         uint16_t pkt_len;
1493         uint64_t pkt_flags;
1494         int nb_dd;
1495         uint32_t s[LOOK_AHEAD];
1496         uint32_t pkt_info[LOOK_AHEAD];
1497         int i, j, nb_rx = 0;
1498         uint32_t status;
1499         uint64_t vlan_flags = rxq->vlan_flags;
1500
1501         /* get references to current descriptor and S/W ring entry */
1502         rxdp = &rxq->rx_ring[rxq->rx_tail];
1503         rxep = &rxq->sw_ring[rxq->rx_tail];
1504
1505         status = rxdp->wb.upper.status_error;
1506         /* check to make sure there is at least 1 packet to receive */
1507         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1508                 return 0;
1509
1510         /*
1511          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1512          * reference packets that are ready to be received.
1513          */
1514         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1515              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1516                 /* Read desc statuses backwards to avoid race condition */
1517                 for (j = 0; j < LOOK_AHEAD; j++)
1518                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1519
1520                 rte_smp_rmb();
1521
1522                 /* Compute how many status bits were set */
1523                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1524                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1525                         ;
1526
1527                 for (j = 0; j < nb_dd; j++)
1528                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1529                                                        lo_dword.data);
1530
1531                 nb_rx += nb_dd;
1532
1533                 /* Translate descriptor info to mbuf format */
1534                 for (j = 0; j < nb_dd; ++j) {
1535                         mb = rxep[j].mbuf;
1536                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1537                                   rxq->crc_len;
1538                         mb->data_len = pkt_len;
1539                         mb->pkt_len = pkt_len;
1540                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1541
1542                         /* convert descriptor fields to rte mbuf flags */
1543                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1544                                 vlan_flags);
1545                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1546                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1547                                         ((uint16_t)pkt_info[j]);
1548                         mb->ol_flags = pkt_flags;
1549                         mb->packet_type =
1550                                 ixgbe_rxd_pkt_info_to_pkt_type
1551                                         (pkt_info[j], rxq->pkt_type_mask);
1552
1553                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1554                                 mb->hash.rss = rte_le_to_cpu_32(
1555                                     rxdp[j].wb.lower.hi_dword.rss);
1556                         else if (pkt_flags & PKT_RX_FDIR) {
1557                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1558                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1559                                     IXGBE_ATR_HASH_MASK;
1560                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1561                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1562                         }
1563                 }
1564
1565                 /* Move mbuf pointers from the S/W ring to the stage */
1566                 for (j = 0; j < LOOK_AHEAD; ++j) {
1567                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1568                 }
1569
1570                 /* stop if all requested packets could not be received */
1571                 if (nb_dd != LOOK_AHEAD)
1572                         break;
1573         }
1574
1575         /* clear software ring entries so we can cleanup correctly */
1576         for (i = 0; i < nb_rx; ++i) {
1577                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1578         }
1579
1580
1581         return nb_rx;
1582 }
1583
1584 static inline int
1585 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1586 {
1587         volatile union ixgbe_adv_rx_desc *rxdp;
1588         struct ixgbe_rx_entry *rxep;
1589         struct rte_mbuf *mb;
1590         uint16_t alloc_idx;
1591         __le64 dma_addr;
1592         int diag, i;
1593
1594         /* allocate buffers in bulk directly into the S/W ring */
1595         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1596         rxep = &rxq->sw_ring[alloc_idx];
1597         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1598                                     rxq->rx_free_thresh);
1599         if (unlikely(diag != 0))
1600                 return -ENOMEM;
1601
1602         rxdp = &rxq->rx_ring[alloc_idx];
1603         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1604                 /* populate the static rte mbuf fields */
1605                 mb = rxep[i].mbuf;
1606                 if (reset_mbuf) {
1607                         mb->port = rxq->port_id;
1608                 }
1609
1610                 rte_mbuf_refcnt_set(mb, 1);
1611                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1612
1613                 /* populate the descriptors */
1614                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1615                 rxdp[i].read.hdr_addr = 0;
1616                 rxdp[i].read.pkt_addr = dma_addr;
1617         }
1618
1619         /* update state of internal queue structure */
1620         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1621         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1622                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1623
1624         /* no errors */
1625         return 0;
1626 }
1627
1628 static inline uint16_t
1629 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1630                          uint16_t nb_pkts)
1631 {
1632         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1633         int i;
1634
1635         /* how many packets are ready to return? */
1636         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1637
1638         /* copy mbuf pointers to the application's packet list */
1639         for (i = 0; i < nb_pkts; ++i)
1640                 rx_pkts[i] = stage[i];
1641
1642         /* update internal queue state */
1643         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1644         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1645
1646         return nb_pkts;
1647 }
1648
1649 static inline uint16_t
1650 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1651              uint16_t nb_pkts)
1652 {
1653         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1654         uint16_t nb_rx = 0;
1655
1656         /* Any previously recv'd pkts will be returned from the Rx stage */
1657         if (rxq->rx_nb_avail)
1658                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1659
1660         /* Scan the H/W ring for packets to receive */
1661         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1662
1663         /* update internal queue state */
1664         rxq->rx_next_avail = 0;
1665         rxq->rx_nb_avail = nb_rx;
1666         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1667
1668         /* if required, allocate new buffers to replenish descriptors */
1669         if (rxq->rx_tail > rxq->rx_free_trigger) {
1670                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1671
1672                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1673                         int i, j;
1674
1675                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1676                                    "queue_id=%u", (unsigned) rxq->port_id,
1677                                    (unsigned) rxq->queue_id);
1678
1679                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1680                                 rxq->rx_free_thresh;
1681
1682                         /*
1683                          * Need to rewind any previous receives if we cannot
1684                          * allocate new buffers to replenish the old ones.
1685                          */
1686                         rxq->rx_nb_avail = 0;
1687                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1688                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1689                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1690
1691                         return 0;
1692                 }
1693
1694                 /* update tail pointer */
1695                 rte_wmb();
1696                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1697                                             cur_free_trigger);
1698         }
1699
1700         if (rxq->rx_tail >= rxq->nb_rx_desc)
1701                 rxq->rx_tail = 0;
1702
1703         /* received any packets this loop? */
1704         if (rxq->rx_nb_avail)
1705                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1706
1707         return 0;
1708 }
1709
1710 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1711 uint16_t
1712 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1713                            uint16_t nb_pkts)
1714 {
1715         uint16_t nb_rx;
1716
1717         if (unlikely(nb_pkts == 0))
1718                 return 0;
1719
1720         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1721                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1722
1723         /* request is relatively large, chunk it up */
1724         nb_rx = 0;
1725         while (nb_pkts) {
1726                 uint16_t ret, n;
1727
1728                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1729                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1730                 nb_rx = (uint16_t)(nb_rx + ret);
1731                 nb_pkts = (uint16_t)(nb_pkts - ret);
1732                 if (ret < n)
1733                         break;
1734         }
1735
1736         return nb_rx;
1737 }
1738
1739 uint16_t
1740 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1741                 uint16_t nb_pkts)
1742 {
1743         struct ixgbe_rx_queue *rxq;
1744         volatile union ixgbe_adv_rx_desc *rx_ring;
1745         volatile union ixgbe_adv_rx_desc *rxdp;
1746         struct ixgbe_rx_entry *sw_ring;
1747         struct ixgbe_rx_entry *rxe;
1748         struct rte_mbuf *rxm;
1749         struct rte_mbuf *nmb;
1750         union ixgbe_adv_rx_desc rxd;
1751         uint64_t dma_addr;
1752         uint32_t staterr;
1753         uint32_t pkt_info;
1754         uint16_t pkt_len;
1755         uint16_t rx_id;
1756         uint16_t nb_rx;
1757         uint16_t nb_hold;
1758         uint64_t pkt_flags;
1759         uint64_t vlan_flags;
1760
1761         nb_rx = 0;
1762         nb_hold = 0;
1763         rxq = rx_queue;
1764         rx_id = rxq->rx_tail;
1765         rx_ring = rxq->rx_ring;
1766         sw_ring = rxq->sw_ring;
1767         vlan_flags = rxq->vlan_flags;
1768         while (nb_rx < nb_pkts) {
1769                 /*
1770                  * The order of operations here is important as the DD status
1771                  * bit must not be read after any other descriptor fields.
1772                  * rx_ring and rxdp are pointing to volatile data so the order
1773                  * of accesses cannot be reordered by the compiler. If they were
1774                  * not volatile, they could be reordered which could lead to
1775                  * using invalid descriptor fields when read from rxd.
1776                  */
1777                 rxdp = &rx_ring[rx_id];
1778                 staterr = rxdp->wb.upper.status_error;
1779                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1780                         break;
1781                 rxd = *rxdp;
1782
1783                 /*
1784                  * End of packet.
1785                  *
1786                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1787                  * is likely to be invalid and to be dropped by the various
1788                  * validation checks performed by the network stack.
1789                  *
1790                  * Allocate a new mbuf to replenish the RX ring descriptor.
1791                  * If the allocation fails:
1792                  *    - arrange for that RX descriptor to be the first one
1793                  *      being parsed the next time the receive function is
1794                  *      invoked [on the same queue].
1795                  *
1796                  *    - Stop parsing the RX ring and return immediately.
1797                  *
1798                  * This policy do not drop the packet received in the RX
1799                  * descriptor for which the allocation of a new mbuf failed.
1800                  * Thus, it allows that packet to be later retrieved if
1801                  * mbuf have been freed in the mean time.
1802                  * As a side effect, holding RX descriptors instead of
1803                  * systematically giving them back to the NIC may lead to
1804                  * RX ring exhaustion situations.
1805                  * However, the NIC can gracefully prevent such situations
1806                  * to happen by sending specific "back-pressure" flow control
1807                  * frames to its peer(s).
1808                  */
1809                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1810                            "ext_err_stat=0x%08x pkt_len=%u",
1811                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1812                            (unsigned) rx_id, (unsigned) staterr,
1813                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1814
1815                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1816                 if (nmb == NULL) {
1817                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1818                                    "queue_id=%u", (unsigned) rxq->port_id,
1819                                    (unsigned) rxq->queue_id);
1820                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1821                         break;
1822                 }
1823
1824                 nb_hold++;
1825                 rxe = &sw_ring[rx_id];
1826                 rx_id++;
1827                 if (rx_id == rxq->nb_rx_desc)
1828                         rx_id = 0;
1829
1830                 /* Prefetch next mbuf while processing current one. */
1831                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1832
1833                 /*
1834                  * When next RX descriptor is on a cache-line boundary,
1835                  * prefetch the next 4 RX descriptors and the next 8 pointers
1836                  * to mbufs.
1837                  */
1838                 if ((rx_id & 0x3) == 0) {
1839                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1840                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1841                 }
1842
1843                 rxm = rxe->mbuf;
1844                 rxe->mbuf = nmb;
1845                 dma_addr =
1846                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1847                 rxdp->read.hdr_addr = 0;
1848                 rxdp->read.pkt_addr = dma_addr;
1849
1850                 /*
1851                  * Initialize the returned mbuf.
1852                  * 1) setup generic mbuf fields:
1853                  *    - number of segments,
1854                  *    - next segment,
1855                  *    - packet length,
1856                  *    - RX port identifier.
1857                  * 2) integrate hardware offload data, if any:
1858                  *    - RSS flag & hash,
1859                  *    - IP checksum flag,
1860                  *    - VLAN TCI, if any,
1861                  *    - error flags.
1862                  */
1863                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1864                                       rxq->crc_len);
1865                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1866                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1867                 rxm->nb_segs = 1;
1868                 rxm->next = NULL;
1869                 rxm->pkt_len = pkt_len;
1870                 rxm->data_len = pkt_len;
1871                 rxm->port = rxq->port_id;
1872
1873                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1874                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1875                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1876
1877                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1878                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1879                 pkt_flags = pkt_flags |
1880                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1881                 rxm->ol_flags = pkt_flags;
1882                 rxm->packet_type =
1883                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1884                                                        rxq->pkt_type_mask);
1885
1886                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1887                         rxm->hash.rss = rte_le_to_cpu_32(
1888                                                 rxd.wb.lower.hi_dword.rss);
1889                 else if (pkt_flags & PKT_RX_FDIR) {
1890                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1891                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1892                                         IXGBE_ATR_HASH_MASK;
1893                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1894                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1895                 }
1896                 /*
1897                  * Store the mbuf address into the next entry of the array
1898                  * of returned packets.
1899                  */
1900                 rx_pkts[nb_rx++] = rxm;
1901         }
1902         rxq->rx_tail = rx_id;
1903
1904         /*
1905          * If the number of free RX descriptors is greater than the RX free
1906          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1907          * register.
1908          * Update the RDT with the value of the last processed RX descriptor
1909          * minus 1, to guarantee that the RDT register is never equal to the
1910          * RDH register, which creates a "full" ring situtation from the
1911          * hardware point of view...
1912          */
1913         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1914         if (nb_hold > rxq->rx_free_thresh) {
1915                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1916                            "nb_hold=%u nb_rx=%u",
1917                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1918                            (unsigned) rx_id, (unsigned) nb_hold,
1919                            (unsigned) nb_rx);
1920                 rx_id = (uint16_t) ((rx_id == 0) ?
1921                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1922                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1923                 nb_hold = 0;
1924         }
1925         rxq->nb_rx_hold = nb_hold;
1926         return nb_rx;
1927 }
1928
1929 /**
1930  * Detect an RSC descriptor.
1931  */
1932 static inline uint32_t
1933 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1934 {
1935         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1936                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1937 }
1938
1939 /**
1940  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1941  *
1942  * Fill the following info in the HEAD buffer of the Rx cluster:
1943  *    - RX port identifier
1944  *    - hardware offload data, if any:
1945  *      - RSS flag & hash
1946  *      - IP checksum flag
1947  *      - VLAN TCI, if any
1948  *      - error flags
1949  * @head HEAD of the packet cluster
1950  * @desc HW descriptor to get data from
1951  * @rxq Pointer to the Rx queue
1952  */
1953 static inline void
1954 ixgbe_fill_cluster_head_buf(
1955         struct rte_mbuf *head,
1956         union ixgbe_adv_rx_desc *desc,
1957         struct ixgbe_rx_queue *rxq,
1958         uint32_t staterr)
1959 {
1960         uint32_t pkt_info;
1961         uint64_t pkt_flags;
1962
1963         head->port = rxq->port_id;
1964
1965         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1966          * set in the pkt_flags field.
1967          */
1968         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1969         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1970         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1971         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1972         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1973         head->ol_flags = pkt_flags;
1974         head->packet_type =
1975                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1976
1977         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1978                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1979         else if (pkt_flags & PKT_RX_FDIR) {
1980                 head->hash.fdir.hash =
1981                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1982                                                           & IXGBE_ATR_HASH_MASK;
1983                 head->hash.fdir.id =
1984                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1985         }
1986 }
1987
1988 /**
1989  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1990  *
1991  * @rx_queue Rx queue handle
1992  * @rx_pkts table of received packets
1993  * @nb_pkts size of rx_pkts table
1994  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1995  *
1996  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1997  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1998  *
1999  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2000  * 1) When non-EOP RSC completion arrives:
2001  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2002  *       segment's data length.
2003  *    b) Set the "next" pointer of the current segment to point to the segment
2004  *       at the NEXTP index.
2005  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2006  *       in the sw_rsc_ring.
2007  * 2) When EOP arrives we just update the cluster's total length and offload
2008  *    flags and deliver the cluster up to the upper layers. In our case - put it
2009  *    in the rx_pkts table.
2010  *
2011  * Returns the number of received packets/clusters (according to the "bulk
2012  * receive" interface).
2013  */
2014 static inline uint16_t
2015 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2016                     bool bulk_alloc)
2017 {
2018         struct ixgbe_rx_queue *rxq = rx_queue;
2019         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2020         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2021         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2022         uint16_t rx_id = rxq->rx_tail;
2023         uint16_t nb_rx = 0;
2024         uint16_t nb_hold = rxq->nb_rx_hold;
2025         uint16_t prev_id = rxq->rx_tail;
2026
2027         while (nb_rx < nb_pkts) {
2028                 bool eop;
2029                 struct ixgbe_rx_entry *rxe;
2030                 struct ixgbe_scattered_rx_entry *sc_entry;
2031                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2032                 struct ixgbe_rx_entry *next_rxe = NULL;
2033                 struct rte_mbuf *first_seg;
2034                 struct rte_mbuf *rxm;
2035                 struct rte_mbuf *nmb = NULL;
2036                 union ixgbe_adv_rx_desc rxd;
2037                 uint16_t data_len;
2038                 uint16_t next_id;
2039                 volatile union ixgbe_adv_rx_desc *rxdp;
2040                 uint32_t staterr;
2041
2042 next_desc:
2043                 /*
2044                  * The code in this whole file uses the volatile pointer to
2045                  * ensure the read ordering of the status and the rest of the
2046                  * descriptor fields (on the compiler level only!!!). This is so
2047                  * UGLY - why not to just use the compiler barrier instead? DPDK
2048                  * even has the rte_compiler_barrier() for that.
2049                  *
2050                  * But most importantly this is just wrong because this doesn't
2051                  * ensure memory ordering in a general case at all. For
2052                  * instance, DPDK is supposed to work on Power CPUs where
2053                  * compiler barrier may just not be enough!
2054                  *
2055                  * I tried to write only this function properly to have a
2056                  * starting point (as a part of an LRO/RSC series) but the
2057                  * compiler cursed at me when I tried to cast away the
2058                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2059                  * keeping it the way it is for now.
2060                  *
2061                  * The code in this file is broken in so many other places and
2062                  * will just not work on a big endian CPU anyway therefore the
2063                  * lines below will have to be revisited together with the rest
2064                  * of the ixgbe PMD.
2065                  *
2066                  * TODO:
2067                  *    - Get rid of "volatile" and let the compiler do its job.
2068                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2069                  *      memory ordering below.
2070                  */
2071                 rxdp = &rx_ring[rx_id];
2072                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2073
2074                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2075                         break;
2076
2077                 rxd = *rxdp;
2078
2079                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2080                                   "staterr=0x%x data_len=%u",
2081                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2082                            rte_le_to_cpu_16(rxd.wb.upper.length));
2083
2084                 if (!bulk_alloc) {
2085                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2086                         if (nmb == NULL) {
2087                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2088                                                   "port_id=%u queue_id=%u",
2089                                            rxq->port_id, rxq->queue_id);
2090
2091                                 rte_eth_devices[rxq->port_id].data->
2092                                                         rx_mbuf_alloc_failed++;
2093                                 break;
2094                         }
2095                 } else if (nb_hold > rxq->rx_free_thresh) {
2096                         uint16_t next_rdt = rxq->rx_free_trigger;
2097
2098                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2099                                 rte_wmb();
2100                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2101                                                             next_rdt);
2102                                 nb_hold -= rxq->rx_free_thresh;
2103                         } else {
2104                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2105                                                   "port_id=%u queue_id=%u",
2106                                            rxq->port_id, rxq->queue_id);
2107
2108                                 rte_eth_devices[rxq->port_id].data->
2109                                                         rx_mbuf_alloc_failed++;
2110                                 break;
2111                         }
2112                 }
2113
2114                 nb_hold++;
2115                 rxe = &sw_ring[rx_id];
2116                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2117
2118                 next_id = rx_id + 1;
2119                 if (next_id == rxq->nb_rx_desc)
2120                         next_id = 0;
2121
2122                 /* Prefetch next mbuf while processing current one. */
2123                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2124
2125                 /*
2126                  * When next RX descriptor is on a cache-line boundary,
2127                  * prefetch the next 4 RX descriptors and the next 4 pointers
2128                  * to mbufs.
2129                  */
2130                 if ((next_id & 0x3) == 0) {
2131                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2132                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2133                 }
2134
2135                 rxm = rxe->mbuf;
2136
2137                 if (!bulk_alloc) {
2138                         __le64 dma =
2139                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2140                         /*
2141                          * Update RX descriptor with the physical address of the
2142                          * new data buffer of the new allocated mbuf.
2143                          */
2144                         rxe->mbuf = nmb;
2145
2146                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2147                         rxdp->read.hdr_addr = 0;
2148                         rxdp->read.pkt_addr = dma;
2149                 } else
2150                         rxe->mbuf = NULL;
2151
2152                 /*
2153                  * Set data length & data buffer address of mbuf.
2154                  */
2155                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2156                 rxm->data_len = data_len;
2157
2158                 if (!eop) {
2159                         uint16_t nextp_id;
2160                         /*
2161                          * Get next descriptor index:
2162                          *  - For RSC it's in the NEXTP field.
2163                          *  - For a scattered packet - it's just a following
2164                          *    descriptor.
2165                          */
2166                         if (ixgbe_rsc_count(&rxd))
2167                                 nextp_id =
2168                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2169                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2170                         else
2171                                 nextp_id = next_id;
2172
2173                         next_sc_entry = &sw_sc_ring[nextp_id];
2174                         next_rxe = &sw_ring[nextp_id];
2175                         rte_ixgbe_prefetch(next_rxe);
2176                 }
2177
2178                 sc_entry = &sw_sc_ring[rx_id];
2179                 first_seg = sc_entry->fbuf;
2180                 sc_entry->fbuf = NULL;
2181
2182                 /*
2183                  * If this is the first buffer of the received packet,
2184                  * set the pointer to the first mbuf of the packet and
2185                  * initialize its context.
2186                  * Otherwise, update the total length and the number of segments
2187                  * of the current scattered packet, and update the pointer to
2188                  * the last mbuf of the current packet.
2189                  */
2190                 if (first_seg == NULL) {
2191                         first_seg = rxm;
2192                         first_seg->pkt_len = data_len;
2193                         first_seg->nb_segs = 1;
2194                 } else {
2195                         first_seg->pkt_len += data_len;
2196                         first_seg->nb_segs++;
2197                 }
2198
2199                 prev_id = rx_id;
2200                 rx_id = next_id;
2201
2202                 /*
2203                  * If this is not the last buffer of the received packet, update
2204                  * the pointer to the first mbuf at the NEXTP entry in the
2205                  * sw_sc_ring and continue to parse the RX ring.
2206                  */
2207                 if (!eop && next_rxe) {
2208                         rxm->next = next_rxe->mbuf;
2209                         next_sc_entry->fbuf = first_seg;
2210                         goto next_desc;
2211                 }
2212
2213                 /* Initialize the first mbuf of the returned packet */
2214                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2215
2216                 /*
2217                  * Deal with the case, when HW CRC srip is disabled.
2218                  * That can't happen when LRO is enabled, but still could
2219                  * happen for scattered RX mode.
2220                  */
2221                 first_seg->pkt_len -= rxq->crc_len;
2222                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2223                         struct rte_mbuf *lp;
2224
2225                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2226                                 ;
2227
2228                         first_seg->nb_segs--;
2229                         lp->data_len -= rxq->crc_len - rxm->data_len;
2230                         lp->next = NULL;
2231                         rte_pktmbuf_free_seg(rxm);
2232                 } else
2233                         rxm->data_len -= rxq->crc_len;
2234
2235                 /* Prefetch data of first segment, if configured to do so. */
2236                 rte_packet_prefetch((char *)first_seg->buf_addr +
2237                         first_seg->data_off);
2238
2239                 /*
2240                  * Store the mbuf address into the next entry of the array
2241                  * of returned packets.
2242                  */
2243                 rx_pkts[nb_rx++] = first_seg;
2244         }
2245
2246         /*
2247          * Record index of the next RX descriptor to probe.
2248          */
2249         rxq->rx_tail = rx_id;
2250
2251         /*
2252          * If the number of free RX descriptors is greater than the RX free
2253          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2254          * register.
2255          * Update the RDT with the value of the last processed RX descriptor
2256          * minus 1, to guarantee that the RDT register is never equal to the
2257          * RDH register, which creates a "full" ring situtation from the
2258          * hardware point of view...
2259          */
2260         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2261                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2262                            "nb_hold=%u nb_rx=%u",
2263                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2264
2265                 rte_wmb();
2266                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2267                 nb_hold = 0;
2268         }
2269
2270         rxq->nb_rx_hold = nb_hold;
2271         return nb_rx;
2272 }
2273
2274 uint16_t
2275 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2276                                  uint16_t nb_pkts)
2277 {
2278         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2279 }
2280
2281 uint16_t
2282 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2283                                uint16_t nb_pkts)
2284 {
2285         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2286 }
2287
2288 /*********************************************************************
2289  *
2290  *  Queue management functions
2291  *
2292  **********************************************************************/
2293
2294 static void __attribute__((cold))
2295 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2296 {
2297         unsigned i;
2298
2299         if (txq->sw_ring != NULL) {
2300                 for (i = 0; i < txq->nb_tx_desc; i++) {
2301                         if (txq->sw_ring[i].mbuf != NULL) {
2302                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2303                                 txq->sw_ring[i].mbuf = NULL;
2304                         }
2305                 }
2306         }
2307 }
2308
2309 static int
2310 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2311 {
2312         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2313         uint16_t i, tx_last, tx_id;
2314         uint16_t nb_tx_free_last;
2315         uint16_t nb_tx_to_clean;
2316         uint32_t pkt_cnt;
2317
2318         /* Start free mbuf from the next of tx_tail */
2319         tx_last = txq->tx_tail;
2320         tx_id  = swr_ring[tx_last].next_id;
2321
2322         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2323                 return 0;
2324
2325         nb_tx_to_clean = txq->nb_tx_free;
2326         nb_tx_free_last = txq->nb_tx_free;
2327         if (!free_cnt)
2328                 free_cnt = txq->nb_tx_desc;
2329
2330         /* Loop through swr_ring to count the amount of
2331          * freeable mubfs and packets.
2332          */
2333         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2334                 for (i = 0; i < nb_tx_to_clean &&
2335                         pkt_cnt < free_cnt &&
2336                         tx_id != tx_last; i++) {
2337                         if (swr_ring[tx_id].mbuf != NULL) {
2338                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2339                                 swr_ring[tx_id].mbuf = NULL;
2340
2341                                 /*
2342                                  * last segment in the packet,
2343                                  * increment packet count
2344                                  */
2345                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2346                         }
2347
2348                         tx_id = swr_ring[tx_id].next_id;
2349                 }
2350
2351                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2352                         txq->nb_tx_free || tx_id == tx_last)
2353                         break;
2354
2355                 if (pkt_cnt < free_cnt) {
2356                         if (ixgbe_xmit_cleanup(txq))
2357                                 break;
2358
2359                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2360                         nb_tx_free_last = txq->nb_tx_free;
2361                 }
2362         }
2363
2364         return (int)pkt_cnt;
2365 }
2366
2367 static int
2368 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2369                         uint32_t free_cnt)
2370 {
2371         int i, n, cnt;
2372
2373         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2374                 free_cnt = txq->nb_tx_desc;
2375
2376         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2377
2378         for (i = 0; i < cnt; i += n) {
2379                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2380                         break;
2381
2382                 n = ixgbe_tx_free_bufs(txq);
2383
2384                 if (n == 0)
2385                         break;
2386         }
2387
2388         return i;
2389 }
2390
2391 static int
2392 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2393                         uint32_t free_cnt __rte_unused)
2394 {
2395         return -ENOTSUP;
2396 }
2397
2398 int
2399 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2400 {
2401         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2402         if (txq->offloads == 0 &&
2403 #ifdef RTE_LIBRTE_SECURITY
2404                         !(txq->using_ipsec) &&
2405 #endif
2406                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2407                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2408                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2409                                         txq->sw_ring_v != NULL)) {
2410                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2411                 } else {
2412                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2413                 }
2414         }
2415
2416         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2417 }
2418
2419 static void __attribute__((cold))
2420 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2421 {
2422         if (txq != NULL &&
2423             txq->sw_ring != NULL)
2424                 rte_free(txq->sw_ring);
2425 }
2426
2427 static void __attribute__((cold))
2428 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2429 {
2430         if (txq != NULL && txq->ops != NULL) {
2431                 txq->ops->release_mbufs(txq);
2432                 txq->ops->free_swring(txq);
2433                 rte_free(txq);
2434         }
2435 }
2436
2437 void __attribute__((cold))
2438 ixgbe_dev_tx_queue_release(void *txq)
2439 {
2440         ixgbe_tx_queue_release(txq);
2441 }
2442
2443 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2444 static void __attribute__((cold))
2445 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2446 {
2447         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2448         struct ixgbe_tx_entry *txe = txq->sw_ring;
2449         uint16_t prev, i;
2450
2451         /* Zero out HW ring memory */
2452         for (i = 0; i < txq->nb_tx_desc; i++) {
2453                 txq->tx_ring[i] = zeroed_desc;
2454         }
2455
2456         /* Initialize SW ring entries */
2457         prev = (uint16_t) (txq->nb_tx_desc - 1);
2458         for (i = 0; i < txq->nb_tx_desc; i++) {
2459                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2460
2461                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2462                 txe[i].mbuf = NULL;
2463                 txe[i].last_id = i;
2464                 txe[prev].next_id = i;
2465                 prev = i;
2466         }
2467
2468         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2469         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2470
2471         txq->tx_tail = 0;
2472         txq->nb_tx_used = 0;
2473         /*
2474          * Always allow 1 descriptor to be un-allocated to avoid
2475          * a H/W race condition
2476          */
2477         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2478         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2479         txq->ctx_curr = 0;
2480         memset((void *)&txq->ctx_cache, 0,
2481                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2482 }
2483
2484 static const struct ixgbe_txq_ops def_txq_ops = {
2485         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2486         .free_swring = ixgbe_tx_free_swring,
2487         .reset = ixgbe_reset_tx_queue,
2488 };
2489
2490 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2491  * the queue parameters. Used in tx_queue_setup by primary process and then
2492  * in dev_init by secondary process when attaching to an existing ethdev.
2493  */
2494 void __attribute__((cold))
2495 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2496 {
2497         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2498         if ((txq->offloads == 0) &&
2499 #ifdef RTE_LIBRTE_SECURITY
2500                         !(txq->using_ipsec) &&
2501 #endif
2502                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2503                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2504                 dev->tx_pkt_prepare = NULL;
2505 #ifdef RTE_IXGBE_INC_VECTOR
2506                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2507                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2508                                         ixgbe_txq_vec_setup(txq) == 0)) {
2509                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2510                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2511                 } else
2512 #endif
2513                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2514         } else {
2515                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2516                 PMD_INIT_LOG(DEBUG,
2517                                 " - offloads = 0x%" PRIx64,
2518                                 txq->offloads);
2519                 PMD_INIT_LOG(DEBUG,
2520                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2521                                 (unsigned long)txq->tx_rs_thresh,
2522                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2523                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2524                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2525         }
2526 }
2527
2528 uint64_t
2529 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2530 {
2531         RTE_SET_USED(dev);
2532
2533         return 0;
2534 }
2535
2536 uint64_t
2537 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2538 {
2539         uint64_t tx_offload_capa;
2540         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2541
2542         tx_offload_capa =
2543                 DEV_TX_OFFLOAD_VLAN_INSERT |
2544                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2545                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2546                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2547                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2548                 DEV_TX_OFFLOAD_TCP_TSO     |
2549                 DEV_TX_OFFLOAD_MULTI_SEGS;
2550
2551         if (hw->mac.type == ixgbe_mac_82599EB ||
2552             hw->mac.type == ixgbe_mac_X540)
2553                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2554
2555         if (hw->mac.type == ixgbe_mac_X550 ||
2556             hw->mac.type == ixgbe_mac_X550EM_x ||
2557             hw->mac.type == ixgbe_mac_X550EM_a)
2558                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2559
2560 #ifdef RTE_LIBRTE_SECURITY
2561         if (dev->security_ctx)
2562                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2563 #endif
2564         return tx_offload_capa;
2565 }
2566
2567 int __attribute__((cold))
2568 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2569                          uint16_t queue_idx,
2570                          uint16_t nb_desc,
2571                          unsigned int socket_id,
2572                          const struct rte_eth_txconf *tx_conf)
2573 {
2574         const struct rte_memzone *tz;
2575         struct ixgbe_tx_queue *txq;
2576         struct ixgbe_hw     *hw;
2577         uint16_t tx_rs_thresh, tx_free_thresh;
2578         uint64_t offloads;
2579
2580         PMD_INIT_FUNC_TRACE();
2581         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2582
2583         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2584
2585         /*
2586          * Validate number of transmit descriptors.
2587          * It must not exceed hardware maximum, and must be multiple
2588          * of IXGBE_ALIGN.
2589          */
2590         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2591                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2592                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2593                 return -EINVAL;
2594         }
2595
2596         /*
2597          * The following two parameters control the setting of the RS bit on
2598          * transmit descriptors.
2599          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2600          * descriptors have been used.
2601          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2602          * descriptors are used or if the number of descriptors required
2603          * to transmit a packet is greater than the number of free TX
2604          * descriptors.
2605          * The following constraints must be satisfied:
2606          *  tx_rs_thresh must be greater than 0.
2607          *  tx_rs_thresh must be less than the size of the ring minus 2.
2608          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2609          *  tx_rs_thresh must be a divisor of the ring size.
2610          *  tx_free_thresh must be greater than 0.
2611          *  tx_free_thresh must be less than the size of the ring minus 3.
2612          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2613          * One descriptor in the TX ring is used as a sentinel to avoid a
2614          * H/W race condition, hence the maximum threshold constraints.
2615          * When set to zero use default values.
2616          */
2617         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2618                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2619         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2620         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2621                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2622         if (tx_conf->tx_rs_thresh > 0)
2623                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2624         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2625                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2626                              "exceed nb_desc. (tx_rs_thresh=%u "
2627                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2628                              (unsigned int)tx_rs_thresh,
2629                              (unsigned int)tx_free_thresh,
2630                              (unsigned int)nb_desc,
2631                              (int)dev->data->port_id,
2632                              (int)queue_idx);
2633                 return -(EINVAL);
2634         }
2635         if (tx_rs_thresh >= (nb_desc - 2)) {
2636                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2637                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2638                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2639                         (int)dev->data->port_id, (int)queue_idx);
2640                 return -(EINVAL);
2641         }
2642         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2643                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2644                         "(tx_rs_thresh=%u port=%d queue=%d)",
2645                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2646                         (int)dev->data->port_id, (int)queue_idx);
2647                 return -(EINVAL);
2648         }
2649         if (tx_free_thresh >= (nb_desc - 3)) {
2650                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2651                              "tx_free_thresh must be less than the number of "
2652                              "TX descriptors minus 3. (tx_free_thresh=%u "
2653                              "port=%d queue=%d)",
2654                              (unsigned int)tx_free_thresh,
2655                              (int)dev->data->port_id, (int)queue_idx);
2656                 return -(EINVAL);
2657         }
2658         if (tx_rs_thresh > tx_free_thresh) {
2659                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2660                              "tx_free_thresh. (tx_free_thresh=%u "
2661                              "tx_rs_thresh=%u port=%d queue=%d)",
2662                              (unsigned int)tx_free_thresh,
2663                              (unsigned int)tx_rs_thresh,
2664                              (int)dev->data->port_id,
2665                              (int)queue_idx);
2666                 return -(EINVAL);
2667         }
2668         if ((nb_desc % tx_rs_thresh) != 0) {
2669                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2670                              "number of TX descriptors. (tx_rs_thresh=%u "
2671                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2672                              (int)dev->data->port_id, (int)queue_idx);
2673                 return -(EINVAL);
2674         }
2675
2676         /*
2677          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2678          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2679          * by the NIC and all descriptors are written back after the NIC
2680          * accumulates WTHRESH descriptors.
2681          */
2682         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2683                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2684                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2685                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2686                              (int)dev->data->port_id, (int)queue_idx);
2687                 return -(EINVAL);
2688         }
2689
2690         /* Free memory prior to re-allocation if needed... */
2691         if (dev->data->tx_queues[queue_idx] != NULL) {
2692                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2693                 dev->data->tx_queues[queue_idx] = NULL;
2694         }
2695
2696         /* First allocate the tx queue data structure */
2697         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2698                                  RTE_CACHE_LINE_SIZE, socket_id);
2699         if (txq == NULL)
2700                 return -ENOMEM;
2701
2702         /*
2703          * Allocate TX ring hardware descriptors. A memzone large enough to
2704          * handle the maximum ring size is allocated in order to allow for
2705          * resizing in later calls to the queue setup function.
2706          */
2707         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2708                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2709                         IXGBE_ALIGN, socket_id);
2710         if (tz == NULL) {
2711                 ixgbe_tx_queue_release(txq);
2712                 return -ENOMEM;
2713         }
2714
2715         txq->nb_tx_desc = nb_desc;
2716         txq->tx_rs_thresh = tx_rs_thresh;
2717         txq->tx_free_thresh = tx_free_thresh;
2718         txq->pthresh = tx_conf->tx_thresh.pthresh;
2719         txq->hthresh = tx_conf->tx_thresh.hthresh;
2720         txq->wthresh = tx_conf->tx_thresh.wthresh;
2721         txq->queue_id = queue_idx;
2722         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2723                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2724         txq->port_id = dev->data->port_id;
2725         txq->offloads = offloads;
2726         txq->ops = &def_txq_ops;
2727         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2728 #ifdef RTE_LIBRTE_SECURITY
2729         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2730                         DEV_TX_OFFLOAD_SECURITY);
2731 #endif
2732
2733         /*
2734          * Modification to set VFTDT for virtual function if vf is detected
2735          */
2736         if (hw->mac.type == ixgbe_mac_82599_vf ||
2737             hw->mac.type == ixgbe_mac_X540_vf ||
2738             hw->mac.type == ixgbe_mac_X550_vf ||
2739             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2740             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2741                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2742         else
2743                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2744
2745         txq->tx_ring_phys_addr = tz->iova;
2746         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2747
2748         /* Allocate software ring */
2749         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2750                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2751                                 RTE_CACHE_LINE_SIZE, socket_id);
2752         if (txq->sw_ring == NULL) {
2753                 ixgbe_tx_queue_release(txq);
2754                 return -ENOMEM;
2755         }
2756         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2757                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2758
2759         /* set up vector or scalar TX function as appropriate */
2760         ixgbe_set_tx_function(dev, txq);
2761
2762         txq->ops->reset(txq);
2763
2764         dev->data->tx_queues[queue_idx] = txq;
2765
2766
2767         return 0;
2768 }
2769
2770 /**
2771  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2772  *
2773  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2774  * in the sw_rsc_ring is not set to NULL but rather points to the next
2775  * mbuf of this RSC aggregation (that has not been completed yet and still
2776  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2777  * will just free first "nb_segs" segments of the cluster explicitly by calling
2778  * an rte_pktmbuf_free_seg().
2779  *
2780  * @m scattered cluster head
2781  */
2782 static void __attribute__((cold))
2783 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2784 {
2785         uint16_t i, nb_segs = m->nb_segs;
2786         struct rte_mbuf *next_seg;
2787
2788         for (i = 0; i < nb_segs; i++) {
2789                 next_seg = m->next;
2790                 rte_pktmbuf_free_seg(m);
2791                 m = next_seg;
2792         }
2793 }
2794
2795 static void __attribute__((cold))
2796 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2797 {
2798         unsigned i;
2799
2800 #ifdef RTE_IXGBE_INC_VECTOR
2801         /* SSE Vector driver has a different way of releasing mbufs. */
2802         if (rxq->rx_using_sse) {
2803                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2804                 return;
2805         }
2806 #endif
2807
2808         if (rxq->sw_ring != NULL) {
2809                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2810                         if (rxq->sw_ring[i].mbuf != NULL) {
2811                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2812                                 rxq->sw_ring[i].mbuf = NULL;
2813                         }
2814                 }
2815                 if (rxq->rx_nb_avail) {
2816                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2817                                 struct rte_mbuf *mb;
2818
2819                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2820                                 rte_pktmbuf_free_seg(mb);
2821                         }
2822                         rxq->rx_nb_avail = 0;
2823                 }
2824         }
2825
2826         if (rxq->sw_sc_ring)
2827                 for (i = 0; i < rxq->nb_rx_desc; i++)
2828                         if (rxq->sw_sc_ring[i].fbuf) {
2829                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2830                                 rxq->sw_sc_ring[i].fbuf = NULL;
2831                         }
2832 }
2833
2834 static void __attribute__((cold))
2835 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2836 {
2837         if (rxq != NULL) {
2838                 ixgbe_rx_queue_release_mbufs(rxq);
2839                 rte_free(rxq->sw_ring);
2840                 rte_free(rxq->sw_sc_ring);
2841                 rte_free(rxq);
2842         }
2843 }
2844
2845 void __attribute__((cold))
2846 ixgbe_dev_rx_queue_release(void *rxq)
2847 {
2848         ixgbe_rx_queue_release(rxq);
2849 }
2850
2851 /*
2852  * Check if Rx Burst Bulk Alloc function can be used.
2853  * Return
2854  *        0: the preconditions are satisfied and the bulk allocation function
2855  *           can be used.
2856  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2857  *           function must be used.
2858  */
2859 static inline int __attribute__((cold))
2860 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2861 {
2862         int ret = 0;
2863
2864         /*
2865          * Make sure the following pre-conditions are satisfied:
2866          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2867          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2868          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2869          * Scattered packets are not supported.  This should be checked
2870          * outside of this function.
2871          */
2872         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2873                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2874                              "rxq->rx_free_thresh=%d, "
2875                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2876                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2877                 ret = -EINVAL;
2878         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2879                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2880                              "rxq->rx_free_thresh=%d, "
2881                              "rxq->nb_rx_desc=%d",
2882                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2883                 ret = -EINVAL;
2884         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2885                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2886                              "rxq->nb_rx_desc=%d, "
2887                              "rxq->rx_free_thresh=%d",
2888                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2889                 ret = -EINVAL;
2890         }
2891
2892         return ret;
2893 }
2894
2895 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2896 static void __attribute__((cold))
2897 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2898 {
2899         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2900         unsigned i;
2901         uint16_t len = rxq->nb_rx_desc;
2902
2903         /*
2904          * By default, the Rx queue setup function allocates enough memory for
2905          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2906          * extra memory at the end of the descriptor ring to be zero'd out.
2907          */
2908         if (adapter->rx_bulk_alloc_allowed)
2909                 /* zero out extra memory */
2910                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2911
2912         /*
2913          * Zero out HW ring memory. Zero out extra memory at the end of
2914          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2915          * reads extra memory as zeros.
2916          */
2917         for (i = 0; i < len; i++) {
2918                 rxq->rx_ring[i] = zeroed_desc;
2919         }
2920
2921         /*
2922          * initialize extra software ring entries. Space for these extra
2923          * entries is always allocated
2924          */
2925         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2926         for (i = rxq->nb_rx_desc; i < len; ++i) {
2927                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2928         }
2929
2930         rxq->rx_nb_avail = 0;
2931         rxq->rx_next_avail = 0;
2932         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2933         rxq->rx_tail = 0;
2934         rxq->nb_rx_hold = 0;
2935         rxq->pkt_first_seg = NULL;
2936         rxq->pkt_last_seg = NULL;
2937
2938 #ifdef RTE_IXGBE_INC_VECTOR
2939         rxq->rxrearm_start = 0;
2940         rxq->rxrearm_nb = 0;
2941 #endif
2942 }
2943
2944 static int
2945 ixgbe_is_vf(struct rte_eth_dev *dev)
2946 {
2947         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2948
2949         switch (hw->mac.type) {
2950         case ixgbe_mac_82599_vf:
2951         case ixgbe_mac_X540_vf:
2952         case ixgbe_mac_X550_vf:
2953         case ixgbe_mac_X550EM_x_vf:
2954         case ixgbe_mac_X550EM_a_vf:
2955                 return 1;
2956         default:
2957                 return 0;
2958         }
2959 }
2960
2961 uint64_t
2962 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2963 {
2964         uint64_t offloads = 0;
2965         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2966
2967         if (hw->mac.type != ixgbe_mac_82598EB)
2968                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2969
2970         return offloads;
2971 }
2972
2973 uint64_t
2974 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2975 {
2976         uint64_t offloads;
2977         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2978
2979         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2980                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2981                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2982                    DEV_RX_OFFLOAD_KEEP_CRC    |
2983                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2984                    DEV_RX_OFFLOAD_VLAN_FILTER |
2985                    DEV_RX_OFFLOAD_SCATTER |
2986                    DEV_RX_OFFLOAD_RSS_HASH;
2987
2988         if (hw->mac.type == ixgbe_mac_82598EB)
2989                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2990
2991         if (ixgbe_is_vf(dev) == 0)
2992                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2993
2994         /*
2995          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2996          * mode.
2997          */
2998         if ((hw->mac.type == ixgbe_mac_82599EB ||
2999              hw->mac.type == ixgbe_mac_X540 ||
3000              hw->mac.type == ixgbe_mac_X550) &&
3001             !RTE_ETH_DEV_SRIOV(dev).active)
3002                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
3003
3004         if (hw->mac.type == ixgbe_mac_82599EB ||
3005             hw->mac.type == ixgbe_mac_X540)
3006                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3007
3008         if (hw->mac.type == ixgbe_mac_X550 ||
3009             hw->mac.type == ixgbe_mac_X550EM_x ||
3010             hw->mac.type == ixgbe_mac_X550EM_a)
3011                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3012
3013 #ifdef RTE_LIBRTE_SECURITY
3014         if (dev->security_ctx)
3015                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3016 #endif
3017
3018         return offloads;
3019 }
3020
3021 int __attribute__((cold))
3022 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3023                          uint16_t queue_idx,
3024                          uint16_t nb_desc,
3025                          unsigned int socket_id,
3026                          const struct rte_eth_rxconf *rx_conf,
3027                          struct rte_mempool *mp)
3028 {
3029         const struct rte_memzone *rz;
3030         struct ixgbe_rx_queue *rxq;
3031         struct ixgbe_hw     *hw;
3032         uint16_t len;
3033         struct ixgbe_adapter *adapter = dev->data->dev_private;
3034         uint64_t offloads;
3035
3036         PMD_INIT_FUNC_TRACE();
3037         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3038
3039         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3040
3041         /*
3042          * Validate number of receive descriptors.
3043          * It must not exceed hardware maximum, and must be multiple
3044          * of IXGBE_ALIGN.
3045          */
3046         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3047                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3048                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3049                 return -EINVAL;
3050         }
3051
3052         /* Free memory prior to re-allocation if needed... */
3053         if (dev->data->rx_queues[queue_idx] != NULL) {
3054                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3055                 dev->data->rx_queues[queue_idx] = NULL;
3056         }
3057
3058         /* First allocate the rx queue data structure */
3059         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3060                                  RTE_CACHE_LINE_SIZE, socket_id);
3061         if (rxq == NULL)
3062                 return -ENOMEM;
3063         rxq->mb_pool = mp;
3064         rxq->nb_rx_desc = nb_desc;
3065         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3066         rxq->queue_id = queue_idx;
3067         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3068                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3069         rxq->port_id = dev->data->port_id;
3070         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3071                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3072         else
3073                 rxq->crc_len = 0;
3074         rxq->drop_en = rx_conf->rx_drop_en;
3075         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3076         rxq->offloads = offloads;
3077
3078         /*
3079          * The packet type in RX descriptor is different for different NICs.
3080          * Some bits are used for x550 but reserved for other NICS.
3081          * So set different masks for different NICs.
3082          */
3083         if (hw->mac.type == ixgbe_mac_X550 ||
3084             hw->mac.type == ixgbe_mac_X550EM_x ||
3085             hw->mac.type == ixgbe_mac_X550EM_a ||
3086             hw->mac.type == ixgbe_mac_X550_vf ||
3087             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3088             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3089                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3090         else
3091                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3092
3093         /*
3094          * Allocate RX ring hardware descriptors. A memzone large enough to
3095          * handle the maximum ring size is allocated in order to allow for
3096          * resizing in later calls to the queue setup function.
3097          */
3098         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3099                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3100         if (rz == NULL) {
3101                 ixgbe_rx_queue_release(rxq);
3102                 return -ENOMEM;
3103         }
3104
3105         /*
3106          * Zero init all the descriptors in the ring.
3107          */
3108         memset(rz->addr, 0, RX_RING_SZ);
3109
3110         /*
3111          * Modified to setup VFRDT for Virtual Function
3112          */
3113         if (hw->mac.type == ixgbe_mac_82599_vf ||
3114             hw->mac.type == ixgbe_mac_X540_vf ||
3115             hw->mac.type == ixgbe_mac_X550_vf ||
3116             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3117             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3118                 rxq->rdt_reg_addr =
3119                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3120                 rxq->rdh_reg_addr =
3121                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3122         } else {
3123                 rxq->rdt_reg_addr =
3124                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3125                 rxq->rdh_reg_addr =
3126                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3127         }
3128
3129         rxq->rx_ring_phys_addr = rz->iova;
3130         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3131
3132         /*
3133          * Certain constraints must be met in order to use the bulk buffer
3134          * allocation Rx burst function. If any of Rx queues doesn't meet them
3135          * the feature should be disabled for the whole port.
3136          */
3137         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3138                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3139                                     "preconditions - canceling the feature for "
3140                                     "the whole port[%d]",
3141                              rxq->queue_id, rxq->port_id);
3142                 adapter->rx_bulk_alloc_allowed = false;
3143         }
3144
3145         /*
3146          * Allocate software ring. Allow for space at the end of the
3147          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3148          * function does not access an invalid memory region.
3149          */
3150         len = nb_desc;
3151         if (adapter->rx_bulk_alloc_allowed)
3152                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3153
3154         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3155                                           sizeof(struct ixgbe_rx_entry) * len,
3156                                           RTE_CACHE_LINE_SIZE, socket_id);
3157         if (!rxq->sw_ring) {
3158                 ixgbe_rx_queue_release(rxq);
3159                 return -ENOMEM;
3160         }
3161
3162         /*
3163          * Always allocate even if it's not going to be needed in order to
3164          * simplify the code.
3165          *
3166          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3167          * be requested in ixgbe_dev_rx_init(), which is called later from
3168          * dev_start() flow.
3169          */
3170         rxq->sw_sc_ring =
3171                 rte_zmalloc_socket("rxq->sw_sc_ring",
3172                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3173                                    RTE_CACHE_LINE_SIZE, socket_id);
3174         if (!rxq->sw_sc_ring) {
3175                 ixgbe_rx_queue_release(rxq);
3176                 return -ENOMEM;
3177         }
3178
3179         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3180                             "dma_addr=0x%"PRIx64,
3181                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3182                      rxq->rx_ring_phys_addr);
3183
3184         if (!rte_is_power_of_2(nb_desc)) {
3185                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3186                                     "preconditions - canceling the feature for "
3187                                     "the whole port[%d]",
3188                              rxq->queue_id, rxq->port_id);
3189                 adapter->rx_vec_allowed = false;
3190         } else
3191                 ixgbe_rxq_vec_setup(rxq);
3192
3193         dev->data->rx_queues[queue_idx] = rxq;
3194
3195         ixgbe_reset_rx_queue(adapter, rxq);
3196
3197         return 0;
3198 }
3199
3200 uint32_t
3201 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3202 {
3203 #define IXGBE_RXQ_SCAN_INTERVAL 4
3204         volatile union ixgbe_adv_rx_desc *rxdp;
3205         struct ixgbe_rx_queue *rxq;
3206         uint32_t desc = 0;
3207
3208         rxq = dev->data->rx_queues[rx_queue_id];
3209         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3210
3211         while ((desc < rxq->nb_rx_desc) &&
3212                 (rxdp->wb.upper.status_error &
3213                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3214                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3215                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3216                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3217                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3218                                 desc - rxq->nb_rx_desc]);
3219         }
3220
3221         return desc;
3222 }
3223
3224 int
3225 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3226 {
3227         volatile union ixgbe_adv_rx_desc *rxdp;
3228         struct ixgbe_rx_queue *rxq = rx_queue;
3229         uint32_t desc;
3230
3231         if (unlikely(offset >= rxq->nb_rx_desc))
3232                 return 0;
3233         desc = rxq->rx_tail + offset;
3234         if (desc >= rxq->nb_rx_desc)
3235                 desc -= rxq->nb_rx_desc;
3236
3237         rxdp = &rxq->rx_ring[desc];
3238         return !!(rxdp->wb.upper.status_error &
3239                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3240 }
3241
3242 int
3243 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3244 {
3245         struct ixgbe_rx_queue *rxq = rx_queue;
3246         volatile uint32_t *status;
3247         uint32_t nb_hold, desc;
3248
3249         if (unlikely(offset >= rxq->nb_rx_desc))
3250                 return -EINVAL;
3251
3252 #ifdef RTE_IXGBE_INC_VECTOR
3253         if (rxq->rx_using_sse)
3254                 nb_hold = rxq->rxrearm_nb;
3255         else
3256 #endif
3257                 nb_hold = rxq->nb_rx_hold;
3258         if (offset >= rxq->nb_rx_desc - nb_hold)
3259                 return RTE_ETH_RX_DESC_UNAVAIL;
3260
3261         desc = rxq->rx_tail + offset;
3262         if (desc >= rxq->nb_rx_desc)
3263                 desc -= rxq->nb_rx_desc;
3264
3265         status = &rxq->rx_ring[desc].wb.upper.status_error;
3266         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3267                 return RTE_ETH_RX_DESC_DONE;
3268
3269         return RTE_ETH_RX_DESC_AVAIL;
3270 }
3271
3272 int
3273 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3274 {
3275         struct ixgbe_tx_queue *txq = tx_queue;
3276         volatile uint32_t *status;
3277         uint32_t desc;
3278
3279         if (unlikely(offset >= txq->nb_tx_desc))
3280                 return -EINVAL;
3281
3282         desc = txq->tx_tail + offset;
3283         /* go to next desc that has the RS bit */
3284         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3285                 txq->tx_rs_thresh;
3286         if (desc >= txq->nb_tx_desc) {
3287                 desc -= txq->nb_tx_desc;
3288                 if (desc >= txq->nb_tx_desc)
3289                         desc -= txq->nb_tx_desc;
3290         }
3291
3292         status = &txq->tx_ring[desc].wb.status;
3293         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3294                 return RTE_ETH_TX_DESC_DONE;
3295
3296         return RTE_ETH_TX_DESC_FULL;
3297 }
3298
3299 /*
3300  * Set up link loopback for X540/X550 mode Tx->Rx.
3301  */
3302 static inline void __attribute__((cold))
3303 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3304 {
3305         uint32_t macc;
3306         PMD_INIT_FUNC_TRACE();
3307
3308         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3309
3310         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3311                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3312         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3313
3314         if (enable) {
3315                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3316                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3317                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3318                 macc |= IXGBE_MACC_FLU;
3319         } else {
3320                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3321                 macc &= ~IXGBE_MACC_FLU;
3322         }
3323
3324         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3325                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3326
3327         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3328 }
3329
3330 void __attribute__((cold))
3331 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3332 {
3333         unsigned i;
3334         struct ixgbe_adapter *adapter = dev->data->dev_private;
3335         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3336
3337         PMD_INIT_FUNC_TRACE();
3338
3339         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3340                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3341
3342                 if (txq != NULL) {
3343                         txq->ops->release_mbufs(txq);
3344                         txq->ops->reset(txq);
3345                 }
3346         }
3347
3348         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3349                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3350
3351                 if (rxq != NULL) {
3352                         ixgbe_rx_queue_release_mbufs(rxq);
3353                         ixgbe_reset_rx_queue(adapter, rxq);
3354                 }
3355         }
3356         /* If loopback mode was enabled, reconfigure the link accordingly */
3357         if (dev->data->dev_conf.lpbk_mode != 0) {
3358                 if (hw->mac.type == ixgbe_mac_X540 ||
3359                      hw->mac.type == ixgbe_mac_X550 ||
3360                      hw->mac.type == ixgbe_mac_X550EM_x ||
3361                      hw->mac.type == ixgbe_mac_X550EM_a)
3362                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3363         }
3364 }
3365
3366 void
3367 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3368 {
3369         unsigned i;
3370
3371         PMD_INIT_FUNC_TRACE();
3372
3373         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3374                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3375                 dev->data->rx_queues[i] = NULL;
3376         }
3377         dev->data->nb_rx_queues = 0;
3378
3379         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3380                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3381                 dev->data->tx_queues[i] = NULL;
3382         }
3383         dev->data->nb_tx_queues = 0;
3384 }
3385
3386 /*********************************************************************
3387  *
3388  *  Device RX/TX init functions
3389  *
3390  **********************************************************************/
3391
3392 /**
3393  * Receive Side Scaling (RSS)
3394  * See section 7.1.2.8 in the following document:
3395  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3396  *
3397  * Principles:
3398  * The source and destination IP addresses of the IP header and the source
3399  * and destination ports of TCP/UDP headers, if any, of received packets are
3400  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3401  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3402  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3403  * RSS output index which is used as the RX queue index where to store the
3404  * received packets.
3405  * The following output is supplied in the RX write-back descriptor:
3406  *     - 32-bit result of the Microsoft RSS hash function,
3407  *     - 4-bit RSS type field.
3408  */
3409
3410 /*
3411  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3412  * Used as the default key.
3413  */
3414 static uint8_t rss_intel_key[40] = {
3415         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3416         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3417         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3418         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3419         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3420 };
3421
3422 static void
3423 ixgbe_rss_disable(struct rte_eth_dev *dev)
3424 {
3425         struct ixgbe_hw *hw;
3426         uint32_t mrqc;
3427         uint32_t mrqc_reg;
3428
3429         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3430         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3431         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3432         mrqc &= ~IXGBE_MRQC_RSSEN;
3433         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3434 }
3435
3436 static void
3437 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3438 {
3439         uint8_t  *hash_key;
3440         uint32_t mrqc;
3441         uint32_t rss_key;
3442         uint64_t rss_hf;
3443         uint16_t i;
3444         uint32_t mrqc_reg;
3445         uint32_t rssrk_reg;
3446
3447         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3448         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3449
3450         hash_key = rss_conf->rss_key;
3451         if (hash_key != NULL) {
3452                 /* Fill in RSS hash key */
3453                 for (i = 0; i < 10; i++) {
3454                         rss_key  = hash_key[(i * 4)];
3455                         rss_key |= hash_key[(i * 4) + 1] << 8;
3456                         rss_key |= hash_key[(i * 4) + 2] << 16;
3457                         rss_key |= hash_key[(i * 4) + 3] << 24;
3458                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3459                 }
3460         }
3461
3462         /* Set configured hashing protocols in MRQC register */
3463         rss_hf = rss_conf->rss_hf;
3464         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3465         if (rss_hf & ETH_RSS_IPV4)
3466                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3467         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3468                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3469         if (rss_hf & ETH_RSS_IPV6)
3470                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3471         if (rss_hf & ETH_RSS_IPV6_EX)
3472                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3473         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3474                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3475         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3476                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3477         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3478                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3479         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3480                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3481         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3482                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3483         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3484 }
3485
3486 int
3487 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3488                           struct rte_eth_rss_conf *rss_conf)
3489 {
3490         struct ixgbe_hw *hw;
3491         uint32_t mrqc;
3492         uint64_t rss_hf;
3493         uint32_t mrqc_reg;
3494
3495         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3496
3497         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3498                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3499                         "NIC.");
3500                 return -ENOTSUP;
3501         }
3502         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3503
3504         /*
3505          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3506          *     "RSS enabling cannot be done dynamically while it must be
3507          *      preceded by a software reset"
3508          * Before changing anything, first check that the update RSS operation
3509          * does not attempt to disable RSS, if RSS was enabled at
3510          * initialization time, or does not attempt to enable RSS, if RSS was
3511          * disabled at initialization time.
3512          */
3513         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3514         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3515         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3516                 if (rss_hf != 0) /* Enable RSS */
3517                         return -(EINVAL);
3518                 return 0; /* Nothing to do */
3519         }
3520         /* RSS enabled */
3521         if (rss_hf == 0) /* Disable RSS */
3522                 return -(EINVAL);
3523         ixgbe_hw_rss_hash_set(hw, rss_conf);
3524         return 0;
3525 }
3526
3527 int
3528 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3529                             struct rte_eth_rss_conf *rss_conf)
3530 {
3531         struct ixgbe_hw *hw;
3532         uint8_t *hash_key;
3533         uint32_t mrqc;
3534         uint32_t rss_key;
3535         uint64_t rss_hf;
3536         uint16_t i;
3537         uint32_t mrqc_reg;
3538         uint32_t rssrk_reg;
3539
3540         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3541         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3542         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3543         hash_key = rss_conf->rss_key;
3544         if (hash_key != NULL) {
3545                 /* Return RSS hash key */
3546                 for (i = 0; i < 10; i++) {
3547                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3548                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3549                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3550                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3551                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3552                 }
3553         }
3554
3555         /* Get RSS functions configured in MRQC register */
3556         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3557         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3558                 rss_conf->rss_hf = 0;
3559                 return 0;
3560         }
3561         rss_hf = 0;
3562         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3563                 rss_hf |= ETH_RSS_IPV4;
3564         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3565                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3566         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3567                 rss_hf |= ETH_RSS_IPV6;
3568         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3569                 rss_hf |= ETH_RSS_IPV6_EX;
3570         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3571                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3572         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3573                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3574         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3575                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3576         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3577                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3578         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3579                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3580         rss_conf->rss_hf = rss_hf;
3581         return 0;
3582 }
3583
3584 static void
3585 ixgbe_rss_configure(struct rte_eth_dev *dev)
3586 {
3587         struct rte_eth_rss_conf rss_conf;
3588         struct ixgbe_adapter *adapter;
3589         struct ixgbe_hw *hw;
3590         uint32_t reta;
3591         uint16_t i;
3592         uint16_t j;
3593         uint16_t sp_reta_size;
3594         uint32_t reta_reg;
3595
3596         PMD_INIT_FUNC_TRACE();
3597         adapter = dev->data->dev_private;
3598         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3599
3600         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3601
3602         /*
3603          * Fill in redirection table
3604          * The byte-swap is needed because NIC registers are in
3605          * little-endian order.
3606          */
3607         if (adapter->rss_reta_updated == 0) {
3608                 reta = 0;
3609                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3610                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3611
3612                         if (j == dev->data->nb_rx_queues)
3613                                 j = 0;
3614                         reta = (reta << 8) | j;
3615                         if ((i & 3) == 3)
3616                                 IXGBE_WRITE_REG(hw, reta_reg,
3617                                                 rte_bswap32(reta));
3618                 }
3619         }
3620
3621         /*
3622          * Configure the RSS key and the RSS protocols used to compute
3623          * the RSS hash of input packets.
3624          */
3625         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3626         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3627                 ixgbe_rss_disable(dev);
3628                 return;
3629         }
3630         if (rss_conf.rss_key == NULL)
3631                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3632         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3633 }
3634
3635 #define NUM_VFTA_REGISTERS 128
3636 #define NIC_RX_BUFFER_SIZE 0x200
3637 #define X550_RX_BUFFER_SIZE 0x180
3638
3639 static void
3640 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3641 {
3642         struct rte_eth_vmdq_dcb_conf *cfg;
3643         struct ixgbe_hw *hw;
3644         enum rte_eth_nb_pools num_pools;
3645         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3646         uint16_t pbsize;
3647         uint8_t nb_tcs; /* number of traffic classes */
3648         int i;
3649
3650         PMD_INIT_FUNC_TRACE();
3651         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3652         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3653         num_pools = cfg->nb_queue_pools;
3654         /* Check we have a valid number of pools */
3655         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3656                 ixgbe_rss_disable(dev);
3657                 return;
3658         }
3659         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3660         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3661
3662         /*
3663          * RXPBSIZE
3664          * split rx buffer up into sections, each for 1 traffic class
3665          */
3666         switch (hw->mac.type) {
3667         case ixgbe_mac_X550:
3668         case ixgbe_mac_X550EM_x:
3669         case ixgbe_mac_X550EM_a:
3670                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3671                 break;
3672         default:
3673                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3674                 break;
3675         }
3676         for (i = 0; i < nb_tcs; i++) {
3677                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3678
3679                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3680                 /* clear 10 bits. */
3681                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3682                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3683         }
3684         /* zero alloc all unused TCs */
3685         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3686                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3687
3688                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3689                 /* clear 10 bits. */
3690                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3691         }
3692
3693         /* MRQC: enable vmdq and dcb */
3694         mrqc = (num_pools == ETH_16_POOLS) ?
3695                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3696         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3697
3698         /* PFVTCTL: turn on virtualisation and set the default pool */
3699         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3700         if (cfg->enable_default_pool) {
3701                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3702         } else {
3703                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3704         }
3705
3706         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3707
3708         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3709         queue_mapping = 0;
3710         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3711                 /*
3712                  * mapping is done with 3 bits per priority,
3713                  * so shift by i*3 each time
3714                  */
3715                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3716
3717         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3718
3719         /* RTRPCS: DCB related */
3720         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3721
3722         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3723         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3724         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3725         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3726
3727         /* VFTA - enable all vlan filters */
3728         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3729                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3730         }
3731
3732         /* VFRE: pool enabling for receive - 16 or 32 */
3733         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3734                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3735
3736         /*
3737          * MPSAR - allow pools to read specific mac addresses
3738          * In this case, all pools should be able to read from mac addr 0
3739          */
3740         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3741         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3742
3743         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3744         for (i = 0; i < cfg->nb_pool_maps; i++) {
3745                 /* set vlan id in VF register and set the valid bit */
3746                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3747                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3748                 /*
3749                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3750                  * pools, we only need to use the first half of the register
3751                  * i.e. bits 0-31
3752                  */
3753                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3754         }
3755 }
3756
3757 /**
3758  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3759  * @dev: pointer to eth_dev structure
3760  * @dcb_config: pointer to ixgbe_dcb_config structure
3761  */
3762 static void
3763 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3764                        struct ixgbe_dcb_config *dcb_config)
3765 {
3766         uint32_t reg;
3767         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3768
3769         PMD_INIT_FUNC_TRACE();
3770         if (hw->mac.type != ixgbe_mac_82598EB) {
3771                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3772                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3773                 reg |= IXGBE_RTTDCS_ARBDIS;
3774                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3775
3776                 /* Enable DCB for Tx with 8 TCs */
3777                 if (dcb_config->num_tcs.pg_tcs == 8) {
3778                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3779                 } else {
3780                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3781                 }
3782                 if (dcb_config->vt_mode)
3783                         reg |= IXGBE_MTQC_VT_ENA;
3784                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3785
3786                 /* Enable the Tx desc arbiter */
3787                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3788                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3789                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3790
3791                 /* Enable Security TX Buffer IFG for DCB */
3792                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3793                 reg |= IXGBE_SECTX_DCB;
3794                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3795         }
3796 }
3797
3798 /**
3799  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3800  * @dev: pointer to rte_eth_dev structure
3801  * @dcb_config: pointer to ixgbe_dcb_config structure
3802  */
3803 static void
3804 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3805                         struct ixgbe_dcb_config *dcb_config)
3806 {
3807         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3808                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3809         struct ixgbe_hw *hw =
3810                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3811
3812         PMD_INIT_FUNC_TRACE();
3813         if (hw->mac.type != ixgbe_mac_82598EB)
3814                 /*PF VF Transmit Enable*/
3815                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3816                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3817
3818         /*Configure general DCB TX parameters*/
3819         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3820 }
3821
3822 static void
3823 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3824                         struct ixgbe_dcb_config *dcb_config)
3825 {
3826         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3827                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3828         struct ixgbe_dcb_tc_config *tc;
3829         uint8_t i, j;
3830
3831         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3832         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3833                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3834                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3835         } else {
3836                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3837                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3838         }
3839
3840         /* Initialize User Priority to Traffic Class mapping */
3841         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3842                 tc = &dcb_config->tc_config[j];
3843                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3844         }
3845
3846         /* User Priority to Traffic Class mapping */
3847         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3848                 j = vmdq_rx_conf->dcb_tc[i];
3849                 tc = &dcb_config->tc_config[j];
3850                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3851                                                 (uint8_t)(1 << i);
3852         }
3853 }
3854
3855 static void
3856 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3857                         struct ixgbe_dcb_config *dcb_config)
3858 {
3859         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3860                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3861         struct ixgbe_dcb_tc_config *tc;
3862         uint8_t i, j;
3863
3864         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3865         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3866                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3867                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3868         } else {
3869                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3870                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3871         }
3872
3873         /* Initialize User Priority to Traffic Class mapping */
3874         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3875                 tc = &dcb_config->tc_config[j];
3876                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3877         }
3878
3879         /* User Priority to Traffic Class mapping */
3880         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3881                 j = vmdq_tx_conf->dcb_tc[i];
3882                 tc = &dcb_config->tc_config[j];
3883                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3884                                                 (uint8_t)(1 << i);
3885         }
3886 }
3887
3888 static void
3889 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3890                 struct ixgbe_dcb_config *dcb_config)
3891 {
3892         struct rte_eth_dcb_rx_conf *rx_conf =
3893                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3894         struct ixgbe_dcb_tc_config *tc;
3895         uint8_t i, j;
3896
3897         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3898         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3899
3900         /* Initialize User Priority to Traffic Class mapping */
3901         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3902                 tc = &dcb_config->tc_config[j];
3903                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3904         }
3905
3906         /* User Priority to Traffic Class mapping */
3907         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3908                 j = rx_conf->dcb_tc[i];
3909                 tc = &dcb_config->tc_config[j];
3910                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3911                                                 (uint8_t)(1 << i);
3912         }
3913 }
3914
3915 static void
3916 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3917                 struct ixgbe_dcb_config *dcb_config)
3918 {
3919         struct rte_eth_dcb_tx_conf *tx_conf =
3920                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3921         struct ixgbe_dcb_tc_config *tc;
3922         uint8_t i, j;
3923
3924         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3925         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3926
3927         /* Initialize User Priority to Traffic Class mapping */
3928         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3929                 tc = &dcb_config->tc_config[j];
3930                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3931         }
3932
3933         /* User Priority to Traffic Class mapping */
3934         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3935                 j = tx_conf->dcb_tc[i];
3936                 tc = &dcb_config->tc_config[j];
3937                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3938                                                 (uint8_t)(1 << i);
3939         }
3940 }
3941
3942 /**
3943  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3944  * @dev: pointer to eth_dev structure
3945  * @dcb_config: pointer to ixgbe_dcb_config structure
3946  */
3947 static void
3948 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3949                        struct ixgbe_dcb_config *dcb_config)
3950 {
3951         uint32_t reg;
3952         uint32_t vlanctrl;
3953         uint8_t i;
3954         uint32_t q;
3955         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3956
3957         PMD_INIT_FUNC_TRACE();
3958         /*
3959          * Disable the arbiter before changing parameters
3960          * (always enable recycle mode; WSP)
3961          */
3962         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3963         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3964
3965         if (hw->mac.type != ixgbe_mac_82598EB) {
3966                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3967                 if (dcb_config->num_tcs.pg_tcs == 4) {
3968                         if (dcb_config->vt_mode)
3969                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3970                                         IXGBE_MRQC_VMDQRT4TCEN;
3971                         else {
3972                                 /* no matter the mode is DCB or DCB_RSS, just
3973                                  * set the MRQE to RSSXTCEN. RSS is controlled
3974                                  * by RSS_FIELD
3975                                  */
3976                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3977                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3978                                         IXGBE_MRQC_RTRSS4TCEN;
3979                         }
3980                 }
3981                 if (dcb_config->num_tcs.pg_tcs == 8) {
3982                         if (dcb_config->vt_mode)
3983                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3984                                         IXGBE_MRQC_VMDQRT8TCEN;
3985                         else {
3986                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3987                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3988                                         IXGBE_MRQC_RTRSS8TCEN;
3989                         }
3990                 }
3991
3992                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3993
3994                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3995                         /* Disable drop for all queues in VMDQ mode*/
3996                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3997                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3998                                                 (IXGBE_QDE_WRITE |
3999                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4000                 } else {
4001                         /* Enable drop for all queues in SRIOV mode */
4002                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4003                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4004                                                 (IXGBE_QDE_WRITE |
4005                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4006                                                  IXGBE_QDE_ENABLE));
4007                 }
4008         }
4009
4010         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4011         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4012         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4013         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4014
4015         /* VFTA - enable all vlan filters */
4016         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4017                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4018         }
4019
4020         /*
4021          * Configure Rx packet plane (recycle mode; WSP) and
4022          * enable arbiter
4023          */
4024         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4025         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4026 }
4027
4028 static void
4029 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4030                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4031 {
4032         switch (hw->mac.type) {
4033         case ixgbe_mac_82598EB:
4034                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4035                 break;
4036         case ixgbe_mac_82599EB:
4037         case ixgbe_mac_X540:
4038         case ixgbe_mac_X550:
4039         case ixgbe_mac_X550EM_x:
4040         case ixgbe_mac_X550EM_a:
4041                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4042                                                   tsa, map);
4043                 break;
4044         default:
4045                 break;
4046         }
4047 }
4048
4049 static void
4050 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4051                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4052 {
4053         switch (hw->mac.type) {
4054         case ixgbe_mac_82598EB:
4055                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4056                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4057                 break;
4058         case ixgbe_mac_82599EB:
4059         case ixgbe_mac_X540:
4060         case ixgbe_mac_X550:
4061         case ixgbe_mac_X550EM_x:
4062         case ixgbe_mac_X550EM_a:
4063                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4064                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4065                 break;
4066         default:
4067                 break;
4068         }
4069 }
4070
4071 #define DCB_RX_CONFIG  1
4072 #define DCB_TX_CONFIG  1
4073 #define DCB_TX_PB      1024
4074 /**
4075  * ixgbe_dcb_hw_configure - Enable DCB and configure
4076  * general DCB in VT mode and non-VT mode parameters
4077  * @dev: pointer to rte_eth_dev structure
4078  * @dcb_config: pointer to ixgbe_dcb_config structure
4079  */
4080 static int
4081 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4082                         struct ixgbe_dcb_config *dcb_config)
4083 {
4084         int     ret = 0;
4085         uint8_t i, pfc_en, nb_tcs;
4086         uint16_t pbsize, rx_buffer_size;
4087         uint8_t config_dcb_rx = 0;
4088         uint8_t config_dcb_tx = 0;
4089         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4090         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4091         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4092         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4093         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4094         struct ixgbe_dcb_tc_config *tc;
4095         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4096                 RTE_ETHER_CRC_LEN;
4097         struct ixgbe_hw *hw =
4098                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4099         struct ixgbe_bw_conf *bw_conf =
4100                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4101
4102         switch (dev->data->dev_conf.rxmode.mq_mode) {
4103         case ETH_MQ_RX_VMDQ_DCB:
4104                 dcb_config->vt_mode = true;
4105                 if (hw->mac.type != ixgbe_mac_82598EB) {
4106                         config_dcb_rx = DCB_RX_CONFIG;
4107                         /*
4108                          *get dcb and VT rx configuration parameters
4109                          *from rte_eth_conf
4110                          */
4111                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4112                         /*Configure general VMDQ and DCB RX parameters*/
4113                         ixgbe_vmdq_dcb_configure(dev);
4114                 }
4115                 break;
4116         case ETH_MQ_RX_DCB:
4117         case ETH_MQ_RX_DCB_RSS:
4118                 dcb_config->vt_mode = false;
4119                 config_dcb_rx = DCB_RX_CONFIG;
4120                 /* Get dcb TX configuration parameters from rte_eth_conf */
4121                 ixgbe_dcb_rx_config(dev, dcb_config);
4122                 /*Configure general DCB RX parameters*/
4123                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4124                 break;
4125         default:
4126                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4127                 break;
4128         }
4129         switch (dev->data->dev_conf.txmode.mq_mode) {
4130         case ETH_MQ_TX_VMDQ_DCB:
4131                 dcb_config->vt_mode = true;
4132                 config_dcb_tx = DCB_TX_CONFIG;
4133                 /* get DCB and VT TX configuration parameters
4134                  * from rte_eth_conf
4135                  */
4136                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4137                 /*Configure general VMDQ and DCB TX parameters*/
4138                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4139                 break;
4140
4141         case ETH_MQ_TX_DCB:
4142                 dcb_config->vt_mode = false;
4143                 config_dcb_tx = DCB_TX_CONFIG;
4144                 /*get DCB TX configuration parameters from rte_eth_conf*/
4145                 ixgbe_dcb_tx_config(dev, dcb_config);
4146                 /*Configure general DCB TX parameters*/
4147                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4148                 break;
4149         default:
4150                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4151                 break;
4152         }
4153
4154         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4155         /* Unpack map */
4156         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4157         if (nb_tcs == ETH_4_TCS) {
4158                 /* Avoid un-configured priority mapping to TC0 */
4159                 uint8_t j = 4;
4160                 uint8_t mask = 0xFF;
4161
4162                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4163                         mask = (uint8_t)(mask & (~(1 << map[i])));
4164                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4165                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4166                                 map[j++] = i;
4167                         mask >>= 1;
4168                 }
4169                 /* Re-configure 4 TCs BW */
4170                 for (i = 0; i < nb_tcs; i++) {
4171                         tc = &dcb_config->tc_config[i];
4172                         if (bw_conf->tc_num != nb_tcs)
4173                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4174                                         (uint8_t)(100 / nb_tcs);
4175                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4176                                                 (uint8_t)(100 / nb_tcs);
4177                 }
4178                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4179                         tc = &dcb_config->tc_config[i];
4180                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4181                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4182                 }
4183         } else {
4184                 /* Re-configure 8 TCs BW */
4185                 for (i = 0; i < nb_tcs; i++) {
4186                         tc = &dcb_config->tc_config[i];
4187                         if (bw_conf->tc_num != nb_tcs)
4188                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4189                                         (uint8_t)(100 / nb_tcs + (i & 1));
4190                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4191                                 (uint8_t)(100 / nb_tcs + (i & 1));
4192                 }
4193         }
4194
4195         switch (hw->mac.type) {
4196         case ixgbe_mac_X550:
4197         case ixgbe_mac_X550EM_x:
4198         case ixgbe_mac_X550EM_a:
4199                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4200                 break;
4201         default:
4202                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4203                 break;
4204         }
4205
4206         if (config_dcb_rx) {
4207                 /* Set RX buffer size */
4208                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4209                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4210
4211                 for (i = 0; i < nb_tcs; i++) {
4212                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4213                 }
4214                 /* zero alloc all unused TCs */
4215                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4216                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4217                 }
4218         }
4219         if (config_dcb_tx) {
4220                 /* Only support an equally distributed
4221                  *  Tx packet buffer strategy.
4222                  */
4223                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4224                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4225
4226                 for (i = 0; i < nb_tcs; i++) {
4227                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4228                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4229                 }
4230                 /* Clear unused TCs, if any, to zero buffer size*/
4231                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4232                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4233                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4234                 }
4235         }
4236
4237         /*Calculates traffic class credits*/
4238         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4239                                 IXGBE_DCB_TX_CONFIG);
4240         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4241                                 IXGBE_DCB_RX_CONFIG);
4242
4243         if (config_dcb_rx) {
4244                 /* Unpack CEE standard containers */
4245                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4246                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4247                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4248                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4249                 /* Configure PG(ETS) RX */
4250                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4251         }
4252
4253         if (config_dcb_tx) {
4254                 /* Unpack CEE standard containers */
4255                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4256                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4257                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4258                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4259                 /* Configure PG(ETS) TX */
4260                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4261         }
4262
4263         /*Configure queue statistics registers*/
4264         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4265
4266         /* Check if the PFC is supported */
4267         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4268                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4269                 for (i = 0; i < nb_tcs; i++) {
4270                         /*
4271                         * If the TC count is 8,and the default high_water is 48,
4272                         * the low_water is 16 as default.
4273                         */
4274                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4275                         hw->fc.low_water[i] = pbsize / 4;
4276                         /* Enable pfc for this TC */
4277                         tc = &dcb_config->tc_config[i];
4278                         tc->pfc = ixgbe_dcb_pfc_enabled;
4279                 }
4280                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4281                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4282                         pfc_en &= 0x0F;
4283                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4284         }
4285
4286         return ret;
4287 }
4288
4289 /**
4290  * ixgbe_configure_dcb - Configure DCB  Hardware
4291  * @dev: pointer to rte_eth_dev
4292  */
4293 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4294 {
4295         struct ixgbe_dcb_config *dcb_cfg =
4296                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4297         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4298
4299         PMD_INIT_FUNC_TRACE();
4300
4301         /* check support mq_mode for DCB */
4302         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4303             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4304             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4305                 return;
4306
4307         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4308                 return;
4309
4310         /** Configure DCB hardware **/
4311         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4312 }
4313
4314 /*
4315  * VMDq only support for 10 GbE NIC.
4316  */
4317 static void
4318 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4319 {
4320         struct rte_eth_vmdq_rx_conf *cfg;
4321         struct ixgbe_hw *hw;
4322         enum rte_eth_nb_pools num_pools;
4323         uint32_t mrqc, vt_ctl, vlanctrl;
4324         uint32_t vmolr = 0;
4325         int i;
4326
4327         PMD_INIT_FUNC_TRACE();
4328         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4329         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4330         num_pools = cfg->nb_queue_pools;
4331
4332         ixgbe_rss_disable(dev);
4333
4334         /* MRQC: enable vmdq */
4335         mrqc = IXGBE_MRQC_VMDQEN;
4336         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4337
4338         /* PFVTCTL: turn on virtualisation and set the default pool */
4339         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4340         if (cfg->enable_default_pool)
4341                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4342         else
4343                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4344
4345         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4346
4347         for (i = 0; i < (int)num_pools; i++) {
4348                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4349                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4350         }
4351
4352         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4353         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4354         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4355         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4356
4357         /* VFTA - enable all vlan filters */
4358         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4359                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4360
4361         /* VFRE: pool enabling for receive - 64 */
4362         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4363         if (num_pools == ETH_64_POOLS)
4364                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4365
4366         /*
4367          * MPSAR - allow pools to read specific mac addresses
4368          * In this case, all pools should be able to read from mac addr 0
4369          */
4370         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4371         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4372
4373         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4374         for (i = 0; i < cfg->nb_pool_maps; i++) {
4375                 /* set vlan id in VF register and set the valid bit */
4376                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4377                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4378                 /*
4379                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4380                  * pools, we only need to use the first half of the register
4381                  * i.e. bits 0-31
4382                  */
4383                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4384                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4385                                         (cfg->pool_map[i].pools & UINT32_MAX));
4386                 else
4387                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4388                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4389
4390         }
4391
4392         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4393         if (cfg->enable_loop_back) {
4394                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4395                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4396                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4397         }
4398
4399         IXGBE_WRITE_FLUSH(hw);
4400 }
4401
4402 /*
4403  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4404  * @hw: pointer to hardware structure
4405  */
4406 static void
4407 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4408 {
4409         uint32_t reg;
4410         uint32_t q;
4411
4412         PMD_INIT_FUNC_TRACE();
4413         /*PF VF Transmit Enable*/
4414         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4415         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4416
4417         /* Disable the Tx desc arbiter so that MTQC can be changed */
4418         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4419         reg |= IXGBE_RTTDCS_ARBDIS;
4420         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4421
4422         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4423         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4424
4425         /* Disable drop for all queues */
4426         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4427                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4428                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4429
4430         /* Enable the Tx desc arbiter */
4431         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4432         reg &= ~IXGBE_RTTDCS_ARBDIS;
4433         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4434
4435         IXGBE_WRITE_FLUSH(hw);
4436 }
4437
4438 static int __attribute__((cold))
4439 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4440 {
4441         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4442         uint64_t dma_addr;
4443         unsigned int i;
4444
4445         /* Initialize software ring entries */
4446         for (i = 0; i < rxq->nb_rx_desc; i++) {
4447                 volatile union ixgbe_adv_rx_desc *rxd;
4448                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4449
4450                 if (mbuf == NULL) {
4451                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4452                                      (unsigned) rxq->queue_id);
4453                         return -ENOMEM;
4454                 }
4455
4456                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4457                 mbuf->port = rxq->port_id;
4458
4459                 dma_addr =
4460                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4461                 rxd = &rxq->rx_ring[i];
4462                 rxd->read.hdr_addr = 0;
4463                 rxd->read.pkt_addr = dma_addr;
4464                 rxe[i].mbuf = mbuf;
4465         }
4466
4467         return 0;
4468 }
4469
4470 static int
4471 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4472 {
4473         struct ixgbe_hw *hw;
4474         uint32_t mrqc;
4475
4476         ixgbe_rss_configure(dev);
4477
4478         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4479
4480         /* MRQC: enable VF RSS */
4481         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4482         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4483         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4484         case ETH_64_POOLS:
4485                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4486                 break;
4487
4488         case ETH_32_POOLS:
4489                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4490                 break;
4491
4492         default:
4493                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4494                 return -EINVAL;
4495         }
4496
4497         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4498
4499         return 0;
4500 }
4501
4502 static int
4503 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4504 {
4505         struct ixgbe_hw *hw =
4506                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4507
4508         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4509         case ETH_64_POOLS:
4510                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4511                         IXGBE_MRQC_VMDQEN);
4512                 break;
4513
4514         case ETH_32_POOLS:
4515                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4516                         IXGBE_MRQC_VMDQRT4TCEN);
4517                 break;
4518
4519         case ETH_16_POOLS:
4520                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4521                         IXGBE_MRQC_VMDQRT8TCEN);
4522                 break;
4523         default:
4524                 PMD_INIT_LOG(ERR,
4525                         "invalid pool number in IOV mode");
4526                 break;
4527         }
4528         return 0;
4529 }
4530
4531 static int
4532 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4533 {
4534         struct ixgbe_hw *hw =
4535                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4536
4537         if (hw->mac.type == ixgbe_mac_82598EB)
4538                 return 0;
4539
4540         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4541                 /*
4542                  * SRIOV inactive scheme
4543                  * any DCB/RSS w/o VMDq multi-queue setting
4544                  */
4545                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4546                 case ETH_MQ_RX_RSS:
4547                 case ETH_MQ_RX_DCB_RSS:
4548                 case ETH_MQ_RX_VMDQ_RSS:
4549                         ixgbe_rss_configure(dev);
4550                         break;
4551
4552                 case ETH_MQ_RX_VMDQ_DCB:
4553                         ixgbe_vmdq_dcb_configure(dev);
4554                         break;
4555
4556                 case ETH_MQ_RX_VMDQ_ONLY:
4557                         ixgbe_vmdq_rx_hw_configure(dev);
4558                         break;
4559
4560                 case ETH_MQ_RX_NONE:
4561                 default:
4562                         /* if mq_mode is none, disable rss mode.*/
4563                         ixgbe_rss_disable(dev);
4564                         break;
4565                 }
4566         } else {
4567                 /* SRIOV active scheme
4568                  * Support RSS together with SRIOV.
4569                  */
4570                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4571                 case ETH_MQ_RX_RSS:
4572                 case ETH_MQ_RX_VMDQ_RSS:
4573                         ixgbe_config_vf_rss(dev);
4574                         break;
4575                 case ETH_MQ_RX_VMDQ_DCB:
4576                 case ETH_MQ_RX_DCB:
4577                 /* In SRIOV, the configuration is the same as VMDq case */
4578                         ixgbe_vmdq_dcb_configure(dev);
4579                         break;
4580                 /* DCB/RSS together with SRIOV is not supported */
4581                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4582                 case ETH_MQ_RX_DCB_RSS:
4583                         PMD_INIT_LOG(ERR,
4584                                 "Could not support DCB/RSS with VMDq & SRIOV");
4585                         return -1;
4586                 default:
4587                         ixgbe_config_vf_default(dev);
4588                         break;
4589                 }
4590         }
4591
4592         return 0;
4593 }
4594
4595 static int
4596 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4597 {
4598         struct ixgbe_hw *hw =
4599                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4600         uint32_t mtqc;
4601         uint32_t rttdcs;
4602
4603         if (hw->mac.type == ixgbe_mac_82598EB)
4604                 return 0;
4605
4606         /* disable arbiter before setting MTQC */
4607         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4608         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4609         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4610
4611         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4612                 /*
4613                  * SRIOV inactive scheme
4614                  * any DCB w/o VMDq multi-queue setting
4615                  */
4616                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4617                         ixgbe_vmdq_tx_hw_configure(hw);
4618                 else {
4619                         mtqc = IXGBE_MTQC_64Q_1PB;
4620                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4621                 }
4622         } else {
4623                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4624
4625                 /*
4626                  * SRIOV active scheme
4627                  * FIXME if support DCB together with VMDq & SRIOV
4628                  */
4629                 case ETH_64_POOLS:
4630                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4631                         break;
4632                 case ETH_32_POOLS:
4633                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4634                         break;
4635                 case ETH_16_POOLS:
4636                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4637                                 IXGBE_MTQC_8TC_8TQ;
4638                         break;
4639                 default:
4640                         mtqc = IXGBE_MTQC_64Q_1PB;
4641                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4642                 }
4643                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4644         }
4645
4646         /* re-enable arbiter */
4647         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4648         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4649
4650         return 0;
4651 }
4652
4653 /**
4654  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4655  *
4656  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4657  * spec rev. 3.0 chapter 8.2.3.8.13.
4658  *
4659  * @pool Memory pool of the Rx queue
4660  */
4661 static inline uint32_t
4662 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4663 {
4664         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4665
4666         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4667         uint16_t maxdesc =
4668                 RTE_IPV4_MAX_PKT_LEN /
4669                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4670
4671         if (maxdesc >= 16)
4672                 return IXGBE_RSCCTL_MAXDESC_16;
4673         else if (maxdesc >= 8)
4674                 return IXGBE_RSCCTL_MAXDESC_8;
4675         else if (maxdesc >= 4)
4676                 return IXGBE_RSCCTL_MAXDESC_4;
4677         else
4678                 return IXGBE_RSCCTL_MAXDESC_1;
4679 }
4680
4681 /**
4682  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4683  * interrupt
4684  *
4685  * (Taken from FreeBSD tree)
4686  * (yes this is all very magic and confusing :)
4687  *
4688  * @dev port handle
4689  * @entry the register array entry
4690  * @vector the MSIX vector for this queue
4691  * @type RX/TX/MISC
4692  */
4693 static void
4694 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4695 {
4696         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4697         u32 ivar, index;
4698
4699         vector |= IXGBE_IVAR_ALLOC_VAL;
4700
4701         switch (hw->mac.type) {
4702
4703         case ixgbe_mac_82598EB:
4704                 if (type == -1)
4705                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4706                 else
4707                         entry += (type * 64);
4708                 index = (entry >> 2) & 0x1F;
4709                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4710                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4711                 ivar |= (vector << (8 * (entry & 0x3)));
4712                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4713                 break;
4714
4715         case ixgbe_mac_82599EB:
4716         case ixgbe_mac_X540:
4717                 if (type == -1) { /* MISC IVAR */
4718                         index = (entry & 1) * 8;
4719                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4720                         ivar &= ~(0xFF << index);
4721                         ivar |= (vector << index);
4722                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4723                 } else {        /* RX/TX IVARS */
4724                         index = (16 * (entry & 1)) + (8 * type);
4725                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4726                         ivar &= ~(0xFF << index);
4727                         ivar |= (vector << index);
4728                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4729                 }
4730
4731                 break;
4732
4733         default:
4734                 break;
4735         }
4736 }
4737
4738 void __attribute__((cold))
4739 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4740 {
4741         uint16_t i, rx_using_sse;
4742         struct ixgbe_adapter *adapter = dev->data->dev_private;
4743
4744         /*
4745          * In order to allow Vector Rx there are a few configuration
4746          * conditions to be met and Rx Bulk Allocation should be allowed.
4747          */
4748         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4749             !adapter->rx_bulk_alloc_allowed) {
4750                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4751                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4752                                     "not enabled",
4753                              dev->data->port_id);
4754
4755                 adapter->rx_vec_allowed = false;
4756         }
4757
4758         /*
4759          * Initialize the appropriate LRO callback.
4760          *
4761          * If all queues satisfy the bulk allocation preconditions
4762          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4763          * Otherwise use a single allocation version.
4764          */
4765         if (dev->data->lro) {
4766                 if (adapter->rx_bulk_alloc_allowed) {
4767                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4768                                            "allocation version");
4769                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4770                 } else {
4771                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4772                                            "allocation version");
4773                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4774                 }
4775         } else if (dev->data->scattered_rx) {
4776                 /*
4777                  * Set the non-LRO scattered callback: there are Vector and
4778                  * single allocation versions.
4779                  */
4780                 if (adapter->rx_vec_allowed) {
4781                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4782                                             "callback (port=%d).",
4783                                      dev->data->port_id);
4784
4785                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4786                 } else if (adapter->rx_bulk_alloc_allowed) {
4787                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4788                                            "allocation callback (port=%d).",
4789                                      dev->data->port_id);
4790                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4791                 } else {
4792                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4793                                             "single allocation) "
4794                                             "Scattered Rx callback "
4795                                             "(port=%d).",
4796                                      dev->data->port_id);
4797
4798                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4799                 }
4800         /*
4801          * Below we set "simple" callbacks according to port/queues parameters.
4802          * If parameters allow we are going to choose between the following
4803          * callbacks:
4804          *    - Vector
4805          *    - Bulk Allocation
4806          *    - Single buffer allocation (the simplest one)
4807          */
4808         } else if (adapter->rx_vec_allowed) {
4809                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4810                                     "burst size no less than %d (port=%d).",
4811                              RTE_IXGBE_DESCS_PER_LOOP,
4812                              dev->data->port_id);
4813
4814                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4815         } else if (adapter->rx_bulk_alloc_allowed) {
4816                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4817                                     "satisfied. Rx Burst Bulk Alloc function "
4818                                     "will be used on port=%d.",
4819                              dev->data->port_id);
4820
4821                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4822         } else {
4823                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4824                                     "satisfied, or Scattered Rx is requested "
4825                                     "(port=%d).",
4826                              dev->data->port_id);
4827
4828                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4829         }
4830
4831         /* Propagate information about RX function choice through all queues. */
4832
4833         rx_using_sse =
4834                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4835                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4836
4837         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4838                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4839
4840                 rxq->rx_using_sse = rx_using_sse;
4841 #ifdef RTE_LIBRTE_SECURITY
4842                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4843                                 DEV_RX_OFFLOAD_SECURITY);
4844 #endif
4845         }
4846 }
4847
4848 /**
4849  * ixgbe_set_rsc - configure RSC related port HW registers
4850  *
4851  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4852  * of 82599 Spec (x540 configuration is virtually the same).
4853  *
4854  * @dev port handle
4855  *
4856  * Returns 0 in case of success or a non-zero error code
4857  */
4858 static int
4859 ixgbe_set_rsc(struct rte_eth_dev *dev)
4860 {
4861         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4862         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4863         struct rte_eth_dev_info dev_info = { 0 };
4864         bool rsc_capable = false;
4865         uint16_t i;
4866         uint32_t rdrxctl;
4867         uint32_t rfctl;
4868
4869         /* Sanity check */
4870         dev->dev_ops->dev_infos_get(dev, &dev_info);
4871         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4872                 rsc_capable = true;
4873
4874         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4875                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4876                                    "support it");
4877                 return -EINVAL;
4878         }
4879
4880         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4881
4882         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4883              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4884                 /*
4885                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4886                  * 3.0 RSC configuration requires HW CRC stripping being
4887                  * enabled. If user requested both HW CRC stripping off
4888                  * and RSC on - return an error.
4889                  */
4890                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4891                                     "is disabled");
4892                 return -EINVAL;
4893         }
4894
4895         /* RFCTL configuration  */
4896         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4897         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4898                 /*
4899                  * Since NFS packets coalescing is not supported - clear
4900                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4901                  * enabled.
4902                  */
4903                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4904                            IXGBE_RFCTL_NFSR_DIS);
4905         else
4906                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4907         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4908
4909         /* If LRO hasn't been requested - we are done here. */
4910         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4911                 return 0;
4912
4913         /* Set RDRXCTL.RSCACKC bit */
4914         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4915         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4916         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4917
4918         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4919         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4920                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4921                 uint32_t srrctl =
4922                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4923                 uint32_t rscctl =
4924                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4925                 uint32_t psrtype =
4926                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4927                 uint32_t eitr =
4928                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4929
4930                 /*
4931                  * ixgbe PMD doesn't support header-split at the moment.
4932                  *
4933                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4934                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4935                  * should be configured even if header split is not
4936                  * enabled. We will configure it 128 bytes following the
4937                  * recommendation in the spec.
4938                  */
4939                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4940                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4941                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4942
4943                 /*
4944                  * TODO: Consider setting the Receive Descriptor Minimum
4945                  * Threshold Size for an RSC case. This is not an obviously
4946                  * beneficiary option but the one worth considering...
4947                  */
4948
4949                 rscctl |= IXGBE_RSCCTL_RSCEN;
4950                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4951                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4952
4953                 /*
4954                  * RSC: Set ITR interval corresponding to 2K ints/s.
4955                  *
4956                  * Full-sized RSC aggregations for a 10Gb/s link will
4957                  * arrive at about 20K aggregation/s rate.
4958                  *
4959                  * 2K inst/s rate will make only 10% of the
4960                  * aggregations to be closed due to the interrupt timer
4961                  * expiration for a streaming at wire-speed case.
4962                  *
4963                  * For a sparse streaming case this setting will yield
4964                  * at most 500us latency for a single RSC aggregation.
4965                  */
4966                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4967                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4968                 eitr |= IXGBE_EITR_CNT_WDIS;
4969
4970                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4971                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4972                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4973                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4974
4975                 /*
4976                  * RSC requires the mapping of the queue to the
4977                  * interrupt vector.
4978                  */
4979                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4980         }
4981
4982         dev->data->lro = 1;
4983
4984         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4985
4986         return 0;
4987 }
4988
4989 /*
4990  * Initializes Receive Unit.
4991  */
4992 int __attribute__((cold))
4993 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4994 {
4995         struct ixgbe_hw     *hw;
4996         struct ixgbe_rx_queue *rxq;
4997         uint64_t bus_addr;
4998         uint32_t rxctrl;
4999         uint32_t fctrl;
5000         uint32_t hlreg0;
5001         uint32_t maxfrs;
5002         uint32_t srrctl;
5003         uint32_t rdrxctl;
5004         uint32_t rxcsum;
5005         uint16_t buf_size;
5006         uint16_t i;
5007         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5008         int rc;
5009
5010         PMD_INIT_FUNC_TRACE();
5011         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5012
5013         /*
5014          * Make sure receives are disabled while setting
5015          * up the RX context (registers, descriptor rings, etc.).
5016          */
5017         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5018         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5019
5020         /* Enable receipt of broadcasted frames */
5021         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5022         fctrl |= IXGBE_FCTRL_BAM;
5023         fctrl |= IXGBE_FCTRL_DPF;
5024         fctrl |= IXGBE_FCTRL_PMCF;
5025         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5026
5027         /*
5028          * Configure CRC stripping, if any.
5029          */
5030         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5031         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5032                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5033         else
5034                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5035
5036         /*
5037          * Configure jumbo frame support, if any.
5038          */
5039         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5040                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5041                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5042                 maxfrs &= 0x0000FFFF;
5043                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5044                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5045         } else
5046                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5047
5048         /*
5049          * If loopback mode is configured, set LPBK bit.
5050          */
5051         if (dev->data->dev_conf.lpbk_mode != 0) {
5052                 rc = ixgbe_check_supported_loopback_mode(dev);
5053                 if (rc < 0) {
5054                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5055                         return rc;
5056                 }
5057                 hlreg0 |= IXGBE_HLREG0_LPBK;
5058         } else {
5059                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5060         }
5061
5062         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5063
5064         /*
5065          * Assume no header split and no VLAN strip support
5066          * on any Rx queue first .
5067          */
5068         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5069         /* Setup RX queues */
5070         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5071                 rxq = dev->data->rx_queues[i];
5072
5073                 /*
5074                  * Reset crc_len in case it was changed after queue setup by a
5075                  * call to configure.
5076                  */
5077                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5078                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5079                 else
5080                         rxq->crc_len = 0;
5081
5082                 /* Setup the Base and Length of the Rx Descriptor Rings */
5083                 bus_addr = rxq->rx_ring_phys_addr;
5084                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5085                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5086                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5087                                 (uint32_t)(bus_addr >> 32));
5088                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5089                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5090                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5091                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5092
5093                 /* Configure the SRRCTL register */
5094                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5095
5096                 /* Set if packets are dropped when no descriptors available */
5097                 if (rxq->drop_en)
5098                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5099
5100                 /*
5101                  * Configure the RX buffer size in the BSIZEPACKET field of
5102                  * the SRRCTL register of the queue.
5103                  * The value is in 1 KB resolution. Valid values can be from
5104                  * 1 KB to 16 KB.
5105                  */
5106                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5107                         RTE_PKTMBUF_HEADROOM);
5108                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5109                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5110
5111                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5112
5113                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5114                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5115
5116                 /* It adds dual VLAN length for supporting dual VLAN */
5117                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5118                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5119                         dev->data->scattered_rx = 1;
5120                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5121                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5122         }
5123
5124         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5125                 dev->data->scattered_rx = 1;
5126
5127         /*
5128          * Device configured with multiple RX queues.
5129          */
5130         ixgbe_dev_mq_rx_configure(dev);
5131
5132         /*
5133          * Setup the Checksum Register.
5134          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5135          * Enable IP/L4 checkum computation by hardware if requested to do so.
5136          */
5137         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5138         rxcsum |= IXGBE_RXCSUM_PCSD;
5139         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5140                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5141         else
5142                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5143
5144         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5145
5146         if (hw->mac.type == ixgbe_mac_82599EB ||
5147             hw->mac.type == ixgbe_mac_X540) {
5148                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5149                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5150                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5151                 else
5152                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5153                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5154                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5155         }
5156
5157         rc = ixgbe_set_rsc(dev);
5158         if (rc)
5159                 return rc;
5160
5161         ixgbe_set_rx_function(dev);
5162
5163         return 0;
5164 }
5165
5166 /*
5167  * Initializes Transmit Unit.
5168  */
5169 void __attribute__((cold))
5170 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5171 {
5172         struct ixgbe_hw     *hw;
5173         struct ixgbe_tx_queue *txq;
5174         uint64_t bus_addr;
5175         uint32_t hlreg0;
5176         uint32_t txctrl;
5177         uint16_t i;
5178
5179         PMD_INIT_FUNC_TRACE();
5180         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5181
5182         /* Enable TX CRC (checksum offload requirement) and hw padding
5183          * (TSO requirement)
5184          */
5185         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5186         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5187         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5188
5189         /* Setup the Base and Length of the Tx Descriptor Rings */
5190         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5191                 txq = dev->data->tx_queues[i];
5192
5193                 bus_addr = txq->tx_ring_phys_addr;
5194                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5195                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5196                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5197                                 (uint32_t)(bus_addr >> 32));
5198                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5199                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5200                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5201                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5202                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5203
5204                 /*
5205                  * Disable Tx Head Writeback RO bit, since this hoses
5206                  * bookkeeping if things aren't delivered in order.
5207                  */
5208                 switch (hw->mac.type) {
5209                 case ixgbe_mac_82598EB:
5210                         txctrl = IXGBE_READ_REG(hw,
5211                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5212                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5213                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5214                                         txctrl);
5215                         break;
5216
5217                 case ixgbe_mac_82599EB:
5218                 case ixgbe_mac_X540:
5219                 case ixgbe_mac_X550:
5220                 case ixgbe_mac_X550EM_x:
5221                 case ixgbe_mac_X550EM_a:
5222                 default:
5223                         txctrl = IXGBE_READ_REG(hw,
5224                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5225                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5226                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5227                                         txctrl);
5228                         break;
5229                 }
5230         }
5231
5232         /* Device configured with multiple TX queues. */
5233         ixgbe_dev_mq_tx_configure(dev);
5234 }
5235
5236 /*
5237  * Check if requested loopback mode is supported
5238  */
5239 int
5240 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5241 {
5242         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5243
5244         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5245                 if (hw->mac.type == ixgbe_mac_82599EB ||
5246                      hw->mac.type == ixgbe_mac_X540 ||
5247                      hw->mac.type == ixgbe_mac_X550 ||
5248                      hw->mac.type == ixgbe_mac_X550EM_x ||
5249                      hw->mac.type == ixgbe_mac_X550EM_a)
5250                         return 0;
5251
5252         return -ENOTSUP;
5253 }
5254
5255 /*
5256  * Set up link for 82599 loopback mode Tx->Rx.
5257  */
5258 static inline void __attribute__((cold))
5259 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5260 {
5261         PMD_INIT_FUNC_TRACE();
5262
5263         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5264                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5265                                 IXGBE_SUCCESS) {
5266                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5267                         /* ignore error */
5268                         return;
5269                 }
5270         }
5271
5272         /* Restart link */
5273         IXGBE_WRITE_REG(hw,
5274                         IXGBE_AUTOC,
5275                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5276         ixgbe_reset_pipeline_82599(hw);
5277
5278         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5279         msec_delay(50);
5280 }
5281
5282
5283 /*
5284  * Start Transmit and Receive Units.
5285  */
5286 int __attribute__((cold))
5287 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5288 {
5289         struct ixgbe_hw     *hw;
5290         struct ixgbe_tx_queue *txq;
5291         struct ixgbe_rx_queue *rxq;
5292         uint32_t txdctl;
5293         uint32_t dmatxctl;
5294         uint32_t rxctrl;
5295         uint16_t i;
5296         int ret = 0;
5297
5298         PMD_INIT_FUNC_TRACE();
5299         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5300
5301         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5302                 txq = dev->data->tx_queues[i];
5303                 /* Setup Transmit Threshold Registers */
5304                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5305                 txdctl |= txq->pthresh & 0x7F;
5306                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5307                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5308                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5309         }
5310
5311         if (hw->mac.type != ixgbe_mac_82598EB) {
5312                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5313                 dmatxctl |= IXGBE_DMATXCTL_TE;
5314                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5315         }
5316
5317         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5318                 txq = dev->data->tx_queues[i];
5319                 if (!txq->tx_deferred_start) {
5320                         ret = ixgbe_dev_tx_queue_start(dev, i);
5321                         if (ret < 0)
5322                                 return ret;
5323                 }
5324         }
5325
5326         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5327                 rxq = dev->data->rx_queues[i];
5328                 if (!rxq->rx_deferred_start) {
5329                         ret = ixgbe_dev_rx_queue_start(dev, i);
5330                         if (ret < 0)
5331                                 return ret;
5332                 }
5333         }
5334
5335         /* Enable Receive engine */
5336         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5337         if (hw->mac.type == ixgbe_mac_82598EB)
5338                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5339         rxctrl |= IXGBE_RXCTRL_RXEN;
5340         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5341
5342         /* If loopback mode is enabled, set up the link accordingly */
5343         if (dev->data->dev_conf.lpbk_mode != 0) {
5344                 if (hw->mac.type == ixgbe_mac_82599EB)
5345                         ixgbe_setup_loopback_link_82599(hw);
5346                 else if (hw->mac.type == ixgbe_mac_X540 ||
5347                      hw->mac.type == ixgbe_mac_X550 ||
5348                      hw->mac.type == ixgbe_mac_X550EM_x ||
5349                      hw->mac.type == ixgbe_mac_X550EM_a)
5350                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5351         }
5352
5353 #ifdef RTE_LIBRTE_SECURITY
5354         if ((dev->data->dev_conf.rxmode.offloads &
5355                         DEV_RX_OFFLOAD_SECURITY) ||
5356                 (dev->data->dev_conf.txmode.offloads &
5357                         DEV_TX_OFFLOAD_SECURITY)) {
5358                 ret = ixgbe_crypto_enable_ipsec(dev);
5359                 if (ret != 0) {
5360                         PMD_DRV_LOG(ERR,
5361                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5362                                     ret);
5363                         return ret;
5364                 }
5365         }
5366 #endif
5367
5368         return 0;
5369 }
5370
5371 /*
5372  * Start Receive Units for specified queue.
5373  */
5374 int __attribute__((cold))
5375 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5376 {
5377         struct ixgbe_hw     *hw;
5378         struct ixgbe_rx_queue *rxq;
5379         uint32_t rxdctl;
5380         int poll_ms;
5381
5382         PMD_INIT_FUNC_TRACE();
5383         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5384
5385         rxq = dev->data->rx_queues[rx_queue_id];
5386
5387         /* Allocate buffers for descriptor rings */
5388         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5389                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5390                              rx_queue_id);
5391                 return -1;
5392         }
5393         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5394         rxdctl |= IXGBE_RXDCTL_ENABLE;
5395         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5396
5397         /* Wait until RX Enable ready */
5398         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5399         do {
5400                 rte_delay_ms(1);
5401                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5402         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5403         if (!poll_ms)
5404                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5405         rte_wmb();
5406         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5407         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5408         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5409
5410         return 0;
5411 }
5412
5413 /*
5414  * Stop Receive Units for specified queue.
5415  */
5416 int __attribute__((cold))
5417 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5418 {
5419         struct ixgbe_hw     *hw;
5420         struct ixgbe_adapter *adapter = dev->data->dev_private;
5421         struct ixgbe_rx_queue *rxq;
5422         uint32_t rxdctl;
5423         int poll_ms;
5424
5425         PMD_INIT_FUNC_TRACE();
5426         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5427
5428         rxq = dev->data->rx_queues[rx_queue_id];
5429
5430         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5431         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5432         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5433
5434         /* Wait until RX Enable bit clear */
5435         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5436         do {
5437                 rte_delay_ms(1);
5438                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5439         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5440         if (!poll_ms)
5441                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5442
5443         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5444
5445         ixgbe_rx_queue_release_mbufs(rxq);
5446         ixgbe_reset_rx_queue(adapter, rxq);
5447         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5448
5449         return 0;
5450 }
5451
5452
5453 /*
5454  * Start Transmit Units for specified queue.
5455  */
5456 int __attribute__((cold))
5457 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5458 {
5459         struct ixgbe_hw     *hw;
5460         struct ixgbe_tx_queue *txq;
5461         uint32_t txdctl;
5462         int poll_ms;
5463
5464         PMD_INIT_FUNC_TRACE();
5465         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5466
5467         txq = dev->data->tx_queues[tx_queue_id];
5468         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5469         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5470         txdctl |= IXGBE_TXDCTL_ENABLE;
5471         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5472
5473         /* Wait until TX Enable ready */
5474         if (hw->mac.type == ixgbe_mac_82599EB) {
5475                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5476                 do {
5477                         rte_delay_ms(1);
5478                         txdctl = IXGBE_READ_REG(hw,
5479                                 IXGBE_TXDCTL(txq->reg_idx));
5480                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5481                 if (!poll_ms)
5482                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5483                                 tx_queue_id);
5484         }
5485         rte_wmb();
5486         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5487         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5488
5489         return 0;
5490 }
5491
5492 /*
5493  * Stop Transmit Units for specified queue.
5494  */
5495 int __attribute__((cold))
5496 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5497 {
5498         struct ixgbe_hw     *hw;
5499         struct ixgbe_tx_queue *txq;
5500         uint32_t txdctl;
5501         uint32_t txtdh, txtdt;
5502         int poll_ms;
5503
5504         PMD_INIT_FUNC_TRACE();
5505         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5506
5507         txq = dev->data->tx_queues[tx_queue_id];
5508
5509         /* Wait until TX queue is empty */
5510         if (hw->mac.type == ixgbe_mac_82599EB) {
5511                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5512                 do {
5513                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5514                         txtdh = IXGBE_READ_REG(hw,
5515                                                IXGBE_TDH(txq->reg_idx));
5516                         txtdt = IXGBE_READ_REG(hw,
5517                                                IXGBE_TDT(txq->reg_idx));
5518                 } while (--poll_ms && (txtdh != txtdt));
5519                 if (!poll_ms)
5520                         PMD_INIT_LOG(ERR,
5521                                 "Tx Queue %d is not empty when stopping.",
5522                                 tx_queue_id);
5523         }
5524
5525         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5526         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5527         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5528
5529         /* Wait until TX Enable bit clear */
5530         if (hw->mac.type == ixgbe_mac_82599EB) {
5531                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5532                 do {
5533                         rte_delay_ms(1);
5534                         txdctl = IXGBE_READ_REG(hw,
5535                                                 IXGBE_TXDCTL(txq->reg_idx));
5536                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5537                 if (!poll_ms)
5538                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5539                                 tx_queue_id);
5540         }
5541
5542         if (txq->ops != NULL) {
5543                 txq->ops->release_mbufs(txq);
5544                 txq->ops->reset(txq);
5545         }
5546         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5547
5548         return 0;
5549 }
5550
5551 void
5552 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5553         struct rte_eth_rxq_info *qinfo)
5554 {
5555         struct ixgbe_rx_queue *rxq;
5556
5557         rxq = dev->data->rx_queues[queue_id];
5558
5559         qinfo->mp = rxq->mb_pool;
5560         qinfo->scattered_rx = dev->data->scattered_rx;
5561         qinfo->nb_desc = rxq->nb_rx_desc;
5562
5563         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5564         qinfo->conf.rx_drop_en = rxq->drop_en;
5565         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5566         qinfo->conf.offloads = rxq->offloads;
5567 }
5568
5569 void
5570 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5571         struct rte_eth_txq_info *qinfo)
5572 {
5573         struct ixgbe_tx_queue *txq;
5574
5575         txq = dev->data->tx_queues[queue_id];
5576
5577         qinfo->nb_desc = txq->nb_tx_desc;
5578
5579         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5580         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5581         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5582
5583         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5584         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5585         qinfo->conf.offloads = txq->offloads;
5586         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5587 }
5588
5589 /*
5590  * [VF] Initializes Receive Unit.
5591  */
5592 int __attribute__((cold))
5593 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5594 {
5595         struct ixgbe_hw     *hw;
5596         struct ixgbe_rx_queue *rxq;
5597         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5598         uint64_t bus_addr;
5599         uint32_t srrctl, psrtype = 0;
5600         uint16_t buf_size;
5601         uint16_t i;
5602         int ret;
5603
5604         PMD_INIT_FUNC_TRACE();
5605         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5606
5607         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5608                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5609                         "it should be power of 2");
5610                 return -1;
5611         }
5612
5613         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5614                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5615                         "it should be equal to or less than %d",
5616                         hw->mac.max_rx_queues);
5617                 return -1;
5618         }
5619
5620         /*
5621          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5622          * disables the VF receipt of packets if the PF MTU is > 1500.
5623          * This is done to deal with 82599 limitations that imposes
5624          * the PF and all VFs to share the same MTU.
5625          * Then, the PF driver enables again the VF receipt of packet when
5626          * the VF driver issues a IXGBE_VF_SET_LPE request.
5627          * In the meantime, the VF device cannot be used, even if the VF driver
5628          * and the Guest VM network stack are ready to accept packets with a
5629          * size up to the PF MTU.
5630          * As a work-around to this PF behaviour, force the call to
5631          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5632          * VF packets received can work in all cases.
5633          */
5634         ixgbevf_rlpml_set_vf(hw,
5635                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5636
5637         /*
5638          * Assume no header split and no VLAN strip support
5639          * on any Rx queue first .
5640          */
5641         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5642         /* Setup RX queues */
5643         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5644                 rxq = dev->data->rx_queues[i];
5645
5646                 /* Allocate buffers for descriptor rings */
5647                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5648                 if (ret)
5649                         return ret;
5650
5651                 /* Setup the Base and Length of the Rx Descriptor Rings */
5652                 bus_addr = rxq->rx_ring_phys_addr;
5653
5654                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5655                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5656                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5657                                 (uint32_t)(bus_addr >> 32));
5658                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5659                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5660                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5661                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5662
5663
5664                 /* Configure the SRRCTL register */
5665                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5666
5667                 /* Set if packets are dropped when no descriptors available */
5668                 if (rxq->drop_en)
5669                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5670
5671                 /*
5672                  * Configure the RX buffer size in the BSIZEPACKET field of
5673                  * the SRRCTL register of the queue.
5674                  * The value is in 1 KB resolution. Valid values can be from
5675                  * 1 KB to 16 KB.
5676                  */
5677                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5678                         RTE_PKTMBUF_HEADROOM);
5679                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5680                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5681
5682                 /*
5683                  * VF modification to write virtual function SRRCTL register
5684                  */
5685                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5686
5687                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5688                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5689
5690                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5691                     /* It adds dual VLAN length for supporting dual VLAN */
5692                     (rxmode->max_rx_pkt_len +
5693                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5694                         if (!dev->data->scattered_rx)
5695                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5696                         dev->data->scattered_rx = 1;
5697                 }
5698
5699                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5700                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5701         }
5702
5703         /* Set RQPL for VF RSS according to max Rx queue */
5704         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5705                 IXGBE_PSRTYPE_RQPL_SHIFT;
5706         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5707
5708         ixgbe_set_rx_function(dev);
5709
5710         return 0;
5711 }
5712
5713 /*
5714  * [VF] Initializes Transmit Unit.
5715  */
5716 void __attribute__((cold))
5717 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5718 {
5719         struct ixgbe_hw     *hw;
5720         struct ixgbe_tx_queue *txq;
5721         uint64_t bus_addr;
5722         uint32_t txctrl;
5723         uint16_t i;
5724
5725         PMD_INIT_FUNC_TRACE();
5726         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5727
5728         /* Setup the Base and Length of the Tx Descriptor Rings */
5729         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5730                 txq = dev->data->tx_queues[i];
5731                 bus_addr = txq->tx_ring_phys_addr;
5732                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5733                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5734                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5735                                 (uint32_t)(bus_addr >> 32));
5736                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5737                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5738                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5739                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5740                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5741
5742                 /*
5743                  * Disable Tx Head Writeback RO bit, since this hoses
5744                  * bookkeeping if things aren't delivered in order.
5745                  */
5746                 txctrl = IXGBE_READ_REG(hw,
5747                                 IXGBE_VFDCA_TXCTRL(i));
5748                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5749                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5750                                 txctrl);
5751         }
5752 }
5753
5754 /*
5755  * [VF] Start Transmit and Receive Units.
5756  */
5757 void __attribute__((cold))
5758 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5759 {
5760         struct ixgbe_hw     *hw;
5761         struct ixgbe_tx_queue *txq;
5762         struct ixgbe_rx_queue *rxq;
5763         uint32_t txdctl;
5764         uint32_t rxdctl;
5765         uint16_t i;
5766         int poll_ms;
5767
5768         PMD_INIT_FUNC_TRACE();
5769         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5770
5771         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5772                 txq = dev->data->tx_queues[i];
5773                 /* Setup Transmit Threshold Registers */
5774                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5775                 txdctl |= txq->pthresh & 0x7F;
5776                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5777                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5778                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5779         }
5780
5781         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5782
5783                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5784                 txdctl |= IXGBE_TXDCTL_ENABLE;
5785                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5786
5787                 poll_ms = 10;
5788                 /* Wait until TX Enable ready */
5789                 do {
5790                         rte_delay_ms(1);
5791                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5792                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5793                 if (!poll_ms)
5794                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5795         }
5796         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5797
5798                 rxq = dev->data->rx_queues[i];
5799
5800                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5801                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5802                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5803
5804                 /* Wait until RX Enable ready */
5805                 poll_ms = 10;
5806                 do {
5807                         rte_delay_ms(1);
5808                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5809                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5810                 if (!poll_ms)
5811                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5812                 rte_wmb();
5813                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5814
5815         }
5816 }
5817
5818 int
5819 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5820                     const struct rte_flow_action_rss *in)
5821 {
5822         if (in->key_len > RTE_DIM(out->key) ||
5823             in->queue_num > RTE_DIM(out->queue))
5824                 return -EINVAL;
5825         out->conf = (struct rte_flow_action_rss){
5826                 .func = in->func,
5827                 .level = in->level,
5828                 .types = in->types,
5829                 .key_len = in->key_len,
5830                 .queue_num = in->queue_num,
5831                 .key = memcpy(out->key, in->key, in->key_len),
5832                 .queue = memcpy(out->queue, in->queue,
5833                                 sizeof(*in->queue) * in->queue_num),
5834         };
5835         return 0;
5836 }
5837
5838 int
5839 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5840                       const struct rte_flow_action_rss *with)
5841 {
5842         return (comp->func == with->func &&
5843                 comp->level == with->level &&
5844                 comp->types == with->types &&
5845                 comp->key_len == with->key_len &&
5846                 comp->queue_num == with->queue_num &&
5847                 !memcmp(comp->key, with->key, with->key_len) &&
5848                 !memcmp(comp->queue, with->queue,
5849                         sizeof(*with->queue) * with->queue_num));
5850 }
5851
5852 int
5853 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5854                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5855 {
5856         struct ixgbe_hw *hw;
5857         uint32_t reta;
5858         uint16_t i;
5859         uint16_t j;
5860         uint16_t sp_reta_size;
5861         uint32_t reta_reg;
5862         struct rte_eth_rss_conf rss_conf = {
5863                 .rss_key = conf->conf.key_len ?
5864                         (void *)(uintptr_t)conf->conf.key : NULL,
5865                 .rss_key_len = conf->conf.key_len,
5866                 .rss_hf = conf->conf.types,
5867         };
5868         struct ixgbe_filter_info *filter_info =
5869                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5870
5871         PMD_INIT_FUNC_TRACE();
5872         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5873
5874         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5875
5876         if (!add) {
5877                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5878                                           &conf->conf)) {
5879                         ixgbe_rss_disable(dev);
5880                         memset(&filter_info->rss_info, 0,
5881                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5882                         return 0;
5883                 }
5884                 return -EINVAL;
5885         }
5886
5887         if (filter_info->rss_info.conf.queue_num)
5888                 return -EINVAL;
5889         /* Fill in redirection table
5890          * The byte-swap is needed because NIC registers are in
5891          * little-endian order.
5892          */
5893         reta = 0;
5894         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5895                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5896
5897                 if (j == conf->conf.queue_num)
5898                         j = 0;
5899                 reta = (reta << 8) | conf->conf.queue[j];
5900                 if ((i & 3) == 3)
5901                         IXGBE_WRITE_REG(hw, reta_reg,
5902                                         rte_bswap32(reta));
5903         }
5904
5905         /* Configure the RSS key and the RSS protocols used to compute
5906          * the RSS hash of input packets.
5907          */
5908         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5909                 ixgbe_rss_disable(dev);
5910                 return 0;
5911         }
5912         if (rss_conf.rss_key == NULL)
5913                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5914         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5915
5916         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5917                 return -EINVAL;
5918
5919         return 0;
5920 }
5921
5922 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5923 __rte_weak int
5924 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5925 {
5926         return -1;
5927 }
5928
5929 __rte_weak uint16_t
5930 ixgbe_recv_pkts_vec(
5931         void __rte_unused *rx_queue,
5932         struct rte_mbuf __rte_unused **rx_pkts,
5933         uint16_t __rte_unused nb_pkts)
5934 {
5935         return 0;
5936 }
5937
5938 __rte_weak uint16_t
5939 ixgbe_recv_scattered_pkts_vec(
5940         void __rte_unused *rx_queue,
5941         struct rte_mbuf __rte_unused **rx_pkts,
5942         uint16_t __rte_unused nb_pkts)
5943 {
5944         return 0;
5945 }
5946
5947 __rte_weak int
5948 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5949 {
5950         return -1;
5951 }