net: add rte prefix to SCTP structure
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45
46 #include "ixgbe_logs.h"
47 #include "base/ixgbe_api.h"
48 #include "base/ixgbe_vf.h"
49 #include "ixgbe_ethdev.h"
50 #include "base/ixgbe_dcb.h"
51 #include "base/ixgbe_common.h"
52 #include "ixgbe_rxtx.h"
53
54 #ifdef RTE_LIBRTE_IEEE1588
55 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
56 #else
57 #define IXGBE_TX_IEEE1588_TMST 0
58 #endif
59 /* Bit Mask to indicate what bits required for building TX context */
60 #define IXGBE_TX_OFFLOAD_MASK (                  \
61                 PKT_TX_OUTER_IPV6 |              \
62                 PKT_TX_OUTER_IPV4 |              \
63                 PKT_TX_IPV6 |                    \
64                 PKT_TX_IPV4 |                    \
65                 PKT_TX_VLAN_PKT |                \
66                 PKT_TX_IP_CKSUM |                \
67                 PKT_TX_L4_MASK |                 \
68                 PKT_TX_TCP_SEG |                 \
69                 PKT_TX_MACSEC |                  \
70                 PKT_TX_OUTER_IP_CKSUM |          \
71                 PKT_TX_SEC_OFFLOAD |     \
72                 IXGBE_TX_IEEE1588_TMST)
73
74 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
75                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
76
77 #if 1
78 #define RTE_PMD_USE_PREFETCH
79 #endif
80
81 #ifdef RTE_PMD_USE_PREFETCH
82 /*
83  * Prefetch a cache line into all cache levels.
84  */
85 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
86 #else
87 #define rte_ixgbe_prefetch(p)   do {} while (0)
88 #endif
89
90 #ifdef RTE_IXGBE_INC_VECTOR
91 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
92                                     uint16_t nb_pkts);
93 #endif
94
95 /*********************************************************************
96  *
97  *  TX functions
98  *
99  **********************************************************************/
100
101 /*
102  * Check for descriptors with their DD bit set and free mbufs.
103  * Return the total number of buffers freed.
104  */
105 static __rte_always_inline int
106 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
107 {
108         struct ixgbe_tx_entry *txep;
109         uint32_t status;
110         int i, nb_free = 0;
111         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
112
113         /* check DD bit on threshold descriptor */
114         status = txq->tx_ring[txq->tx_next_dd].wb.status;
115         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
116                 return 0;
117
118         /*
119          * first buffer to free from S/W ring is at index
120          * tx_next_dd - (tx_rs_thresh-1)
121          */
122         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
123
124         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
125                 /* free buffers one at a time */
126                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
127                 txep->mbuf = NULL;
128
129                 if (unlikely(m == NULL))
130                         continue;
131
132                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
133                     (nb_free > 0 && m->pool != free[0]->pool)) {
134                         rte_mempool_put_bulk(free[0]->pool,
135                                              (void **)free, nb_free);
136                         nb_free = 0;
137                 }
138
139                 free[nb_free++] = m;
140         }
141
142         if (nb_free > 0)
143                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
144
145         /* buffers were freed, update counters */
146         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
147         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
148         if (txq->tx_next_dd >= txq->nb_tx_desc)
149                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
150
151         return txq->tx_rs_thresh;
152 }
153
154 /* Populate 4 descriptors with data from 4 mbufs */
155 static inline void
156 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
157 {
158         uint64_t buf_dma_addr;
159         uint32_t pkt_len;
160         int i;
161
162         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
163                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
164                 pkt_len = (*pkts)->data_len;
165
166                 /* write data to descriptor */
167                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
168
169                 txdp->read.cmd_type_len =
170                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
171
172                 txdp->read.olinfo_status =
173                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
174
175                 rte_prefetch0(&(*pkts)->pool);
176         }
177 }
178
179 /* Populate 1 descriptor with data from 1 mbuf */
180 static inline void
181 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
182 {
183         uint64_t buf_dma_addr;
184         uint32_t pkt_len;
185
186         buf_dma_addr = rte_mbuf_data_iova(*pkts);
187         pkt_len = (*pkts)->data_len;
188
189         /* write data to descriptor */
190         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
191         txdp->read.cmd_type_len =
192                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
193         txdp->read.olinfo_status =
194                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
195         rte_prefetch0(&(*pkts)->pool);
196 }
197
198 /*
199  * Fill H/W descriptor ring with mbuf data.
200  * Copy mbuf pointers to the S/W ring.
201  */
202 static inline void
203 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
204                       uint16_t nb_pkts)
205 {
206         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
207         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
208         const int N_PER_LOOP = 4;
209         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
210         int mainpart, leftover;
211         int i, j;
212
213         /*
214          * Process most of the packets in chunks of N pkts.  Any
215          * leftover packets will get processed one at a time.
216          */
217         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
218         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
219         for (i = 0; i < mainpart; i += N_PER_LOOP) {
220                 /* Copy N mbuf pointers to the S/W ring */
221                 for (j = 0; j < N_PER_LOOP; ++j) {
222                         (txep + i + j)->mbuf = *(pkts + i + j);
223                 }
224                 tx4(txdp + i, pkts + i);
225         }
226
227         if (unlikely(leftover > 0)) {
228                 for (i = 0; i < leftover; ++i) {
229                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
230                         tx1(txdp + mainpart + i, pkts + mainpart + i);
231                 }
232         }
233 }
234
235 static inline uint16_t
236 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
237              uint16_t nb_pkts)
238 {
239         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
240         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
241         uint16_t n = 0;
242
243         /*
244          * Begin scanning the H/W ring for done descriptors when the
245          * number of available descriptors drops below tx_free_thresh.  For
246          * each done descriptor, free the associated buffer.
247          */
248         if (txq->nb_tx_free < txq->tx_free_thresh)
249                 ixgbe_tx_free_bufs(txq);
250
251         /* Only use descriptors that are available */
252         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
253         if (unlikely(nb_pkts == 0))
254                 return 0;
255
256         /* Use exactly nb_pkts descriptors */
257         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
258
259         /*
260          * At this point, we know there are enough descriptors in the
261          * ring to transmit all the packets.  This assumes that each
262          * mbuf contains a single segment, and that no new offloads
263          * are expected, which would require a new context descriptor.
264          */
265
266         /*
267          * See if we're going to wrap-around. If so, handle the top
268          * of the descriptor ring first, then do the bottom.  If not,
269          * the processing looks just like the "bottom" part anyway...
270          */
271         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
272                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
273                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
274
275                 /*
276                  * We know that the last descriptor in the ring will need to
277                  * have its RS bit set because tx_rs_thresh has to be
278                  * a divisor of the ring size
279                  */
280                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
281                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
282                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
283
284                 txq->tx_tail = 0;
285         }
286
287         /* Fill H/W descriptor ring with mbuf data */
288         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
289         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
290
291         /*
292          * Determine if RS bit should be set
293          * This is what we actually want:
294          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
295          * but instead of subtracting 1 and doing >=, we can just do
296          * greater than without subtracting.
297          */
298         if (txq->tx_tail > txq->tx_next_rs) {
299                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
300                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
301                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
302                                                 txq->tx_rs_thresh);
303                 if (txq->tx_next_rs >= txq->nb_tx_desc)
304                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
305         }
306
307         /*
308          * Check for wrap-around. This would only happen if we used
309          * up to the last descriptor in the ring, no more, no less.
310          */
311         if (txq->tx_tail >= txq->nb_tx_desc)
312                 txq->tx_tail = 0;
313
314         /* update tail pointer */
315         rte_wmb();
316         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
317
318         return nb_pkts;
319 }
320
321 uint16_t
322 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
323                        uint16_t nb_pkts)
324 {
325         uint16_t nb_tx;
326
327         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
328         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
329                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
330
331         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
332         nb_tx = 0;
333         while (nb_pkts) {
334                 uint16_t ret, n;
335
336                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
337                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
338                 nb_tx = (uint16_t)(nb_tx + ret);
339                 nb_pkts = (uint16_t)(nb_pkts - ret);
340                 if (ret < n)
341                         break;
342         }
343
344         return nb_tx;
345 }
346
347 #ifdef RTE_IXGBE_INC_VECTOR
348 static uint16_t
349 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
350                     uint16_t nb_pkts)
351 {
352         uint16_t nb_tx = 0;
353         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
354
355         while (nb_pkts) {
356                 uint16_t ret, num;
357
358                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
359                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
360                                                  num);
361                 nb_tx += ret;
362                 nb_pkts -= ret;
363                 if (ret < num)
364                         break;
365         }
366
367         return nb_tx;
368 }
369 #endif
370
371 static inline void
372 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
373                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
374                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
375                 __rte_unused uint64_t *mdata)
376 {
377         uint32_t type_tucmd_mlhl;
378         uint32_t mss_l4len_idx = 0;
379         uint32_t ctx_idx;
380         uint32_t vlan_macip_lens;
381         union ixgbe_tx_offload tx_offload_mask;
382         uint32_t seqnum_seed = 0;
383
384         ctx_idx = txq->ctx_curr;
385         tx_offload_mask.data[0] = 0;
386         tx_offload_mask.data[1] = 0;
387         type_tucmd_mlhl = 0;
388
389         /* Specify which HW CTX to upload. */
390         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
391
392         if (ol_flags & PKT_TX_VLAN_PKT) {
393                 tx_offload_mask.vlan_tci |= ~0;
394         }
395
396         /* check if TCP segmentation required for this packet */
397         if (ol_flags & PKT_TX_TCP_SEG) {
398                 /* implies IP cksum in IPv4 */
399                 if (ol_flags & PKT_TX_IP_CKSUM)
400                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
401                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
402                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
403                 else
404                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
405                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
406                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
407
408                 tx_offload_mask.l2_len |= ~0;
409                 tx_offload_mask.l3_len |= ~0;
410                 tx_offload_mask.l4_len |= ~0;
411                 tx_offload_mask.tso_segsz |= ~0;
412                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
413                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
414         } else { /* no TSO, check if hardware checksum is needed */
415                 if (ol_flags & PKT_TX_IP_CKSUM) {
416                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
417                         tx_offload_mask.l2_len |= ~0;
418                         tx_offload_mask.l3_len |= ~0;
419                 }
420
421                 switch (ol_flags & PKT_TX_L4_MASK) {
422                 case PKT_TX_UDP_CKSUM:
423                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
424                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
425                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
426                         tx_offload_mask.l2_len |= ~0;
427                         tx_offload_mask.l3_len |= ~0;
428                         break;
429                 case PKT_TX_TCP_CKSUM:
430                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
431                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
432                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
433                         tx_offload_mask.l2_len |= ~0;
434                         tx_offload_mask.l3_len |= ~0;
435                         break;
436                 case PKT_TX_SCTP_CKSUM:
437                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
438                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
439                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
440                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
441                         tx_offload_mask.l2_len |= ~0;
442                         tx_offload_mask.l3_len |= ~0;
443                         break;
444                 default:
445                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
446                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
447                         break;
448                 }
449         }
450
451         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
452                 tx_offload_mask.outer_l2_len |= ~0;
453                 tx_offload_mask.outer_l3_len |= ~0;
454                 tx_offload_mask.l2_len |= ~0;
455                 seqnum_seed |= tx_offload.outer_l3_len
456                                << IXGBE_ADVTXD_OUTER_IPLEN;
457                 seqnum_seed |= tx_offload.l2_len
458                                << IXGBE_ADVTXD_TUNNEL_LEN;
459         }
460 #ifdef RTE_LIBRTE_SECURITY
461         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
462                 union ixgbe_crypto_tx_desc_md *md =
463                                 (union ixgbe_crypto_tx_desc_md *)mdata;
464                 seqnum_seed |=
465                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
466                 type_tucmd_mlhl |= md->enc ?
467                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
468                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
469                 type_tucmd_mlhl |=
470                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
471                 tx_offload_mask.sa_idx |= ~0;
472                 tx_offload_mask.sec_pad_len |= ~0;
473         }
474 #endif
475
476         txq->ctx_cache[ctx_idx].flags = ol_flags;
477         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
478                 tx_offload_mask.data[0] & tx_offload.data[0];
479         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
480                 tx_offload_mask.data[1] & tx_offload.data[1];
481         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
482
483         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
484         vlan_macip_lens = tx_offload.l3_len;
485         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
486                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
487                                     IXGBE_ADVTXD_MACLEN_SHIFT);
488         else
489                 vlan_macip_lens |= (tx_offload.l2_len <<
490                                     IXGBE_ADVTXD_MACLEN_SHIFT);
491         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
492         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
493         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
494         ctx_txd->seqnum_seed     = seqnum_seed;
495 }
496
497 /*
498  * Check which hardware context can be used. Use the existing match
499  * or create a new context descriptor.
500  */
501 static inline uint32_t
502 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
503                    union ixgbe_tx_offload tx_offload)
504 {
505         /* If match with the current used context */
506         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
507                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
508                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
509                      & tx_offload.data[0])) &&
510                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
511                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
512                      & tx_offload.data[1]))))
513                 return txq->ctx_curr;
514
515         /* What if match with the next context  */
516         txq->ctx_curr ^= 1;
517         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
518                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
519                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
520                      & tx_offload.data[0])) &&
521                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
522                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
523                      & tx_offload.data[1]))))
524                 return txq->ctx_curr;
525
526         /* Mismatch, use the previous context */
527         return IXGBE_CTX_NUM;
528 }
529
530 static inline uint32_t
531 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
532 {
533         uint32_t tmp = 0;
534
535         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
536                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
537         if (ol_flags & PKT_TX_IP_CKSUM)
538                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
539         if (ol_flags & PKT_TX_TCP_SEG)
540                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
541         return tmp;
542 }
543
544 static inline uint32_t
545 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
546 {
547         uint32_t cmdtype = 0;
548
549         if (ol_flags & PKT_TX_VLAN_PKT)
550                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
551         if (ol_flags & PKT_TX_TCP_SEG)
552                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
553         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
554                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
555         if (ol_flags & PKT_TX_MACSEC)
556                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
557         return cmdtype;
558 }
559
560 /* Default RS bit threshold values */
561 #ifndef DEFAULT_TX_RS_THRESH
562 #define DEFAULT_TX_RS_THRESH   32
563 #endif
564 #ifndef DEFAULT_TX_FREE_THRESH
565 #define DEFAULT_TX_FREE_THRESH 32
566 #endif
567
568 /* Reset transmit descriptors after they have been used */
569 static inline int
570 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
571 {
572         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
573         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
574         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
575         uint16_t nb_tx_desc = txq->nb_tx_desc;
576         uint16_t desc_to_clean_to;
577         uint16_t nb_tx_to_clean;
578         uint32_t status;
579
580         /* Determine the last descriptor needing to be cleaned */
581         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
582         if (desc_to_clean_to >= nb_tx_desc)
583                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
584
585         /* Check to make sure the last descriptor to clean is done */
586         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
587         status = txr[desc_to_clean_to].wb.status;
588         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
589                 PMD_TX_FREE_LOG(DEBUG,
590                                 "TX descriptor %4u is not done"
591                                 "(port=%d queue=%d)",
592                                 desc_to_clean_to,
593                                 txq->port_id, txq->queue_id);
594                 /* Failed to clean any descriptors, better luck next time */
595                 return -(1);
596         }
597
598         /* Figure out how many descriptors will be cleaned */
599         if (last_desc_cleaned > desc_to_clean_to)
600                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
601                                                         desc_to_clean_to);
602         else
603                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
604                                                 last_desc_cleaned);
605
606         PMD_TX_FREE_LOG(DEBUG,
607                         "Cleaning %4u TX descriptors: %4u to %4u "
608                         "(port=%d queue=%d)",
609                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
610                         txq->port_id, txq->queue_id);
611
612         /*
613          * The last descriptor to clean is done, so that means all the
614          * descriptors from the last descriptor that was cleaned
615          * up to the last descriptor with the RS bit set
616          * are done. Only reset the threshold descriptor.
617          */
618         txr[desc_to_clean_to].wb.status = 0;
619
620         /* Update the txq to reflect the last descriptor that was cleaned */
621         txq->last_desc_cleaned = desc_to_clean_to;
622         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
623
624         /* No Error */
625         return 0;
626 }
627
628 uint16_t
629 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
630                 uint16_t nb_pkts)
631 {
632         struct ixgbe_tx_queue *txq;
633         struct ixgbe_tx_entry *sw_ring;
634         struct ixgbe_tx_entry *txe, *txn;
635         volatile union ixgbe_adv_tx_desc *txr;
636         volatile union ixgbe_adv_tx_desc *txd, *txp;
637         struct rte_mbuf     *tx_pkt;
638         struct rte_mbuf     *m_seg;
639         uint64_t buf_dma_addr;
640         uint32_t olinfo_status;
641         uint32_t cmd_type_len;
642         uint32_t pkt_len;
643         uint16_t slen;
644         uint64_t ol_flags;
645         uint16_t tx_id;
646         uint16_t tx_last;
647         uint16_t nb_tx;
648         uint16_t nb_used;
649         uint64_t tx_ol_req;
650         uint32_t ctx = 0;
651         uint32_t new_ctx;
652         union ixgbe_tx_offload tx_offload;
653 #ifdef RTE_LIBRTE_SECURITY
654         uint8_t use_ipsec;
655 #endif
656
657         tx_offload.data[0] = 0;
658         tx_offload.data[1] = 0;
659         txq = tx_queue;
660         sw_ring = txq->sw_ring;
661         txr     = txq->tx_ring;
662         tx_id   = txq->tx_tail;
663         txe = &sw_ring[tx_id];
664         txp = NULL;
665
666         /* Determine if the descriptor ring needs to be cleaned. */
667         if (txq->nb_tx_free < txq->tx_free_thresh)
668                 ixgbe_xmit_cleanup(txq);
669
670         rte_prefetch0(&txe->mbuf->pool);
671
672         /* TX loop */
673         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
674                 new_ctx = 0;
675                 tx_pkt = *tx_pkts++;
676                 pkt_len = tx_pkt->pkt_len;
677
678                 /*
679                  * Determine how many (if any) context descriptors
680                  * are needed for offload functionality.
681                  */
682                 ol_flags = tx_pkt->ol_flags;
683 #ifdef RTE_LIBRTE_SECURITY
684                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
685 #endif
686
687                 /* If hardware offload required */
688                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
689                 if (tx_ol_req) {
690                         tx_offload.l2_len = tx_pkt->l2_len;
691                         tx_offload.l3_len = tx_pkt->l3_len;
692                         tx_offload.l4_len = tx_pkt->l4_len;
693                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
694                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
695                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
696                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
697 #ifdef RTE_LIBRTE_SECURITY
698                         if (use_ipsec) {
699                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
700                                         (union ixgbe_crypto_tx_desc_md *)
701                                                         &tx_pkt->udata64;
702                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
703                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
704                         }
705 #endif
706
707                         /* If new context need be built or reuse the exist ctx. */
708                         ctx = what_advctx_update(txq, tx_ol_req,
709                                 tx_offload);
710                         /* Only allocate context descriptor if required*/
711                         new_ctx = (ctx == IXGBE_CTX_NUM);
712                         ctx = txq->ctx_curr;
713                 }
714
715                 /*
716                  * Keep track of how many descriptors are used this loop
717                  * This will always be the number of segments + the number of
718                  * Context descriptors required to transmit the packet
719                  */
720                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
721
722                 if (txp != NULL &&
723                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
724                         /* set RS on the previous packet in the burst */
725                         txp->read.cmd_type_len |=
726                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
727
728                 /*
729                  * The number of descriptors that must be allocated for a
730                  * packet is the number of segments of that packet, plus 1
731                  * Context Descriptor for the hardware offload, if any.
732                  * Determine the last TX descriptor to allocate in the TX ring
733                  * for the packet, starting from the current position (tx_id)
734                  * in the ring.
735                  */
736                 tx_last = (uint16_t) (tx_id + nb_used - 1);
737
738                 /* Circular ring */
739                 if (tx_last >= txq->nb_tx_desc)
740                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
741
742                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
743                            " tx_first=%u tx_last=%u",
744                            (unsigned) txq->port_id,
745                            (unsigned) txq->queue_id,
746                            (unsigned) pkt_len,
747                            (unsigned) tx_id,
748                            (unsigned) tx_last);
749
750                 /*
751                  * Make sure there are enough TX descriptors available to
752                  * transmit the entire packet.
753                  * nb_used better be less than or equal to txq->tx_rs_thresh
754                  */
755                 if (nb_used > txq->nb_tx_free) {
756                         PMD_TX_FREE_LOG(DEBUG,
757                                         "Not enough free TX descriptors "
758                                         "nb_used=%4u nb_free=%4u "
759                                         "(port=%d queue=%d)",
760                                         nb_used, txq->nb_tx_free,
761                                         txq->port_id, txq->queue_id);
762
763                         if (ixgbe_xmit_cleanup(txq) != 0) {
764                                 /* Could not clean any descriptors */
765                                 if (nb_tx == 0)
766                                         return 0;
767                                 goto end_of_tx;
768                         }
769
770                         /* nb_used better be <= txq->tx_rs_thresh */
771                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
772                                 PMD_TX_FREE_LOG(DEBUG,
773                                         "The number of descriptors needed to "
774                                         "transmit the packet exceeds the "
775                                         "RS bit threshold. This will impact "
776                                         "performance."
777                                         "nb_used=%4u nb_free=%4u "
778                                         "tx_rs_thresh=%4u. "
779                                         "(port=%d queue=%d)",
780                                         nb_used, txq->nb_tx_free,
781                                         txq->tx_rs_thresh,
782                                         txq->port_id, txq->queue_id);
783                                 /*
784                                  * Loop here until there are enough TX
785                                  * descriptors or until the ring cannot be
786                                  * cleaned.
787                                  */
788                                 while (nb_used > txq->nb_tx_free) {
789                                         if (ixgbe_xmit_cleanup(txq) != 0) {
790                                                 /*
791                                                  * Could not clean any
792                                                  * descriptors
793                                                  */
794                                                 if (nb_tx == 0)
795                                                         return 0;
796                                                 goto end_of_tx;
797                                         }
798                                 }
799                         }
800                 }
801
802                 /*
803                  * By now there are enough free TX descriptors to transmit
804                  * the packet.
805                  */
806
807                 /*
808                  * Set common flags of all TX Data Descriptors.
809                  *
810                  * The following bits must be set in all Data Descriptors:
811                  *   - IXGBE_ADVTXD_DTYP_DATA
812                  *   - IXGBE_ADVTXD_DCMD_DEXT
813                  *
814                  * The following bits must be set in the first Data Descriptor
815                  * and are ignored in the other ones:
816                  *   - IXGBE_ADVTXD_DCMD_IFCS
817                  *   - IXGBE_ADVTXD_MAC_1588
818                  *   - IXGBE_ADVTXD_DCMD_VLE
819                  *
820                  * The following bits must only be set in the last Data
821                  * Descriptor:
822                  *   - IXGBE_TXD_CMD_EOP
823                  *
824                  * The following bits can be set in any Data Descriptor, but
825                  * are only set in the last Data Descriptor:
826                  *   - IXGBE_TXD_CMD_RS
827                  */
828                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
829                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
830
831 #ifdef RTE_LIBRTE_IEEE1588
832                 if (ol_flags & PKT_TX_IEEE1588_TMST)
833                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
834 #endif
835
836                 olinfo_status = 0;
837                 if (tx_ol_req) {
838
839                         if (ol_flags & PKT_TX_TCP_SEG) {
840                                 /* when TSO is on, paylen in descriptor is the
841                                  * not the packet len but the tcp payload len */
842                                 pkt_len -= (tx_offload.l2_len +
843                                         tx_offload.l3_len + tx_offload.l4_len);
844                         }
845
846                         /*
847                          * Setup the TX Advanced Context Descriptor if required
848                          */
849                         if (new_ctx) {
850                                 volatile struct ixgbe_adv_tx_context_desc *
851                                     ctx_txd;
852
853                                 ctx_txd = (volatile struct
854                                     ixgbe_adv_tx_context_desc *)
855                                     &txr[tx_id];
856
857                                 txn = &sw_ring[txe->next_id];
858                                 rte_prefetch0(&txn->mbuf->pool);
859
860                                 if (txe->mbuf != NULL) {
861                                         rte_pktmbuf_free_seg(txe->mbuf);
862                                         txe->mbuf = NULL;
863                                 }
864
865                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
866                                         tx_offload, &tx_pkt->udata64);
867
868                                 txe->last_id = tx_last;
869                                 tx_id = txe->next_id;
870                                 txe = txn;
871                         }
872
873                         /*
874                          * Setup the TX Advanced Data Descriptor,
875                          * This path will go through
876                          * whatever new/reuse the context descriptor
877                          */
878                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
879                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
880                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
881                 }
882
883                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
884 #ifdef RTE_LIBRTE_SECURITY
885                 if (use_ipsec)
886                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
887 #endif
888
889                 m_seg = tx_pkt;
890                 do {
891                         txd = &txr[tx_id];
892                         txn = &sw_ring[txe->next_id];
893                         rte_prefetch0(&txn->mbuf->pool);
894
895                         if (txe->mbuf != NULL)
896                                 rte_pktmbuf_free_seg(txe->mbuf);
897                         txe->mbuf = m_seg;
898
899                         /*
900                          * Set up Transmit Data Descriptor.
901                          */
902                         slen = m_seg->data_len;
903                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
904                         txd->read.buffer_addr =
905                                 rte_cpu_to_le_64(buf_dma_addr);
906                         txd->read.cmd_type_len =
907                                 rte_cpu_to_le_32(cmd_type_len | slen);
908                         txd->read.olinfo_status =
909                                 rte_cpu_to_le_32(olinfo_status);
910                         txe->last_id = tx_last;
911                         tx_id = txe->next_id;
912                         txe = txn;
913                         m_seg = m_seg->next;
914                 } while (m_seg != NULL);
915
916                 /*
917                  * The last packet data descriptor needs End Of Packet (EOP)
918                  */
919                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
920                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
921                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
922
923                 /* Set RS bit only on threshold packets' last descriptor */
924                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
925                         PMD_TX_FREE_LOG(DEBUG,
926                                         "Setting RS bit on TXD id="
927                                         "%4u (port=%d queue=%d)",
928                                         tx_last, txq->port_id, txq->queue_id);
929
930                         cmd_type_len |= IXGBE_TXD_CMD_RS;
931
932                         /* Update txq RS bit counters */
933                         txq->nb_tx_used = 0;
934                         txp = NULL;
935                 } else
936                         txp = txd;
937
938                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
939         }
940
941 end_of_tx:
942         /* set RS on last packet in the burst */
943         if (txp != NULL)
944                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
945
946         rte_wmb();
947
948         /*
949          * Set the Transmit Descriptor Tail (TDT)
950          */
951         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
952                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
953                    (unsigned) tx_id, (unsigned) nb_tx);
954         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
955         txq->tx_tail = tx_id;
956
957         return nb_tx;
958 }
959
960 /*********************************************************************
961  *
962  *  TX prep functions
963  *
964  **********************************************************************/
965 uint16_t
966 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
967 {
968         int i, ret;
969         uint64_t ol_flags;
970         struct rte_mbuf *m;
971         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
972
973         for (i = 0; i < nb_pkts; i++) {
974                 m = tx_pkts[i];
975                 ol_flags = m->ol_flags;
976
977                 /**
978                  * Check if packet meets requirements for number of segments
979                  *
980                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
981                  *       non-TSO
982                  */
983
984                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
985                         rte_errno = -EINVAL;
986                         return i;
987                 }
988
989                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
990                         rte_errno = -ENOTSUP;
991                         return i;
992                 }
993
994 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
995                 ret = rte_validate_tx_offload(m);
996                 if (ret != 0) {
997                         rte_errno = ret;
998                         return i;
999                 }
1000 #endif
1001                 ret = rte_net_intel_cksum_prepare(m);
1002                 if (ret != 0) {
1003                         rte_errno = ret;
1004                         return i;
1005                 }
1006         }
1007
1008         return i;
1009 }
1010
1011 /*********************************************************************
1012  *
1013  *  RX functions
1014  *
1015  **********************************************************************/
1016
1017 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1018 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1019 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1020 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1021 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1022 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1023 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1024 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1026 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1027 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1028 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1029 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1030 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1031 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1032 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1034 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1035 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1036 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1038 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1039 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1040 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1042 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1043 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1044 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1046 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1047 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1048 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1050
1051 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1052 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1074
1075 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1076 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1098
1099 /**
1100  * Use 2 different table for normal packet and tunnel packet
1101  * to save the space.
1102  */
1103 const uint32_t
1104         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1105         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1106         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1107                 RTE_PTYPE_L3_IPV4,
1108         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1109                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1110         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1112         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1114         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4_EXT,
1116         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1122         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV6,
1124         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1126         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1128         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1130         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6_EXT,
1132         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1138         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1140                 RTE_PTYPE_INNER_L3_IPV6,
1141         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1142                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1143                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1144         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1145                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1146         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1147         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1148                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1149                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1150         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1151                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1152                 RTE_PTYPE_INNER_L3_IPV6,
1153         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1154                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1155                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1156         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1157                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1158                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1159         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1160                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1161                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1162         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1163                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1164                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1165         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1166                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1167                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1168         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1169                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1170                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1171         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1172                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1173                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1174         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1175                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1176                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1177         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1178                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1179                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1180         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1181                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1182                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1183         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1184                 RTE_PTYPE_L2_ETHER |
1185                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1187 };
1188
1189 const uint32_t
1190         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1191         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1192                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1193                 RTE_PTYPE_INNER_L2_ETHER,
1194         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1195                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1196                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1197         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1198                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1199                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1200         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1201                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1202                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1203         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1204                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1205                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1206         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1207                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1209         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1210                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1211                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1212         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1213                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1214                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1215                 RTE_PTYPE_INNER_L4_TCP,
1216         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1217                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1219                 RTE_PTYPE_INNER_L4_TCP,
1220         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1221                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1223         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1224                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1225                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1226                 RTE_PTYPE_INNER_L4_TCP,
1227         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1228                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1229                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1230                 RTE_PTYPE_INNER_L3_IPV4,
1231         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1232                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1233                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1234                 RTE_PTYPE_INNER_L4_UDP,
1235         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1236                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1238                 RTE_PTYPE_INNER_L4_UDP,
1239         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1240                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1242                 RTE_PTYPE_INNER_L4_SCTP,
1243         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1244                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1246         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1247                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1248                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1249                 RTE_PTYPE_INNER_L4_UDP,
1250         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1251                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1253                 RTE_PTYPE_INNER_L4_SCTP,
1254         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1255                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1256                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1257                 RTE_PTYPE_INNER_L3_IPV4,
1258         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1259                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1260                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1261                 RTE_PTYPE_INNER_L4_SCTP,
1262         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1263                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1265                 RTE_PTYPE_INNER_L4_SCTP,
1266         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1267                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                 RTE_PTYPE_INNER_L4_TCP,
1270         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1273                 RTE_PTYPE_INNER_L4_UDP,
1274
1275         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1276                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1277                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1278         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1279                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1280                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1281                 RTE_PTYPE_INNER_L3_IPV4,
1282         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1283                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1286         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                 RTE_PTYPE_INNER_L3_IPV6,
1290         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                 RTE_PTYPE_INNER_L3_IPV4,
1294         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1298         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1299                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                 RTE_PTYPE_INNER_L3_IPV4,
1302         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1303                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1307                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV4,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1319                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1320                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1321                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1323                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1324                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1325                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1335                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                 RTE_PTYPE_INNER_L3_IPV4,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1343                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1347                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1348                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1349                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1351                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1352                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1353                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1354         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1355                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1358         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1359                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1362         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1363                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1366 };
1367
1368 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1369 static inline uint32_t
1370 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1371 {
1372
1373         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1374                 return RTE_PTYPE_UNKNOWN;
1375
1376         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1377
1378         /* For tunnel packet */
1379         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1380                 /* Remove the tunnel bit to save the space. */
1381                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1382                 return ptype_table_tn[pkt_info];
1383         }
1384
1385         /**
1386          * For x550, if it's not tunnel,
1387          * tunnel type bit should be set to 0.
1388          * Reuse 82599's mask.
1389          */
1390         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1391
1392         return ptype_table[pkt_info];
1393 }
1394
1395 static inline uint64_t
1396 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1397 {
1398         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1399                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1400                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1401                 PKT_RX_RSS_HASH, 0, 0, 0,
1402                 0, 0, 0,  PKT_RX_FDIR,
1403         };
1404 #ifdef RTE_LIBRTE_IEEE1588
1405         static uint64_t ip_pkt_etqf_map[8] = {
1406                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1407                 0, 0, 0, 0,
1408         };
1409
1410         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1411                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1412                                 ip_rss_types_map[pkt_info & 0XF];
1413         else
1414                 return ip_rss_types_map[pkt_info & 0XF];
1415 #else
1416         return ip_rss_types_map[pkt_info & 0XF];
1417 #endif
1418 }
1419
1420 static inline uint64_t
1421 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1422 {
1423         uint64_t pkt_flags;
1424
1425         /*
1426          * Check if VLAN present only.
1427          * Do not check whether L3/L4 rx checksum done by NIC or not,
1428          * That can be found from rte_eth_rxmode.offloads flag
1429          */
1430         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1431
1432 #ifdef RTE_LIBRTE_IEEE1588
1433         if (rx_status & IXGBE_RXD_STAT_TMST)
1434                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1435 #endif
1436         return pkt_flags;
1437 }
1438
1439 static inline uint64_t
1440 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1441 {
1442         uint64_t pkt_flags;
1443
1444         /*
1445          * Bit 31: IPE, IPv4 checksum error
1446          * Bit 30: L4I, L4I integrity error
1447          */
1448         static uint64_t error_to_pkt_flags_map[4] = {
1449                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1450                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1451                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1452                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1453         };
1454         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1455                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1456
1457         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1458             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1459                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1460         }
1461
1462 #ifdef RTE_LIBRTE_SECURITY
1463         if (rx_status & IXGBE_RXD_STAT_SECP) {
1464                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1465                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1466                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1467         }
1468 #endif
1469
1470         return pkt_flags;
1471 }
1472
1473 /*
1474  * LOOK_AHEAD defines how many desc statuses to check beyond the
1475  * current descriptor.
1476  * It must be a pound define for optimal performance.
1477  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1478  * function only works with LOOK_AHEAD=8.
1479  */
1480 #define LOOK_AHEAD 8
1481 #if (LOOK_AHEAD != 8)
1482 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1483 #endif
1484 static inline int
1485 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1486 {
1487         volatile union ixgbe_adv_rx_desc *rxdp;
1488         struct ixgbe_rx_entry *rxep;
1489         struct rte_mbuf *mb;
1490         uint16_t pkt_len;
1491         uint64_t pkt_flags;
1492         int nb_dd;
1493         uint32_t s[LOOK_AHEAD];
1494         uint32_t pkt_info[LOOK_AHEAD];
1495         int i, j, nb_rx = 0;
1496         uint32_t status;
1497         uint64_t vlan_flags = rxq->vlan_flags;
1498
1499         /* get references to current descriptor and S/W ring entry */
1500         rxdp = &rxq->rx_ring[rxq->rx_tail];
1501         rxep = &rxq->sw_ring[rxq->rx_tail];
1502
1503         status = rxdp->wb.upper.status_error;
1504         /* check to make sure there is at least 1 packet to receive */
1505         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1506                 return 0;
1507
1508         /*
1509          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1510          * reference packets that are ready to be received.
1511          */
1512         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1513              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1514                 /* Read desc statuses backwards to avoid race condition */
1515                 for (j = 0; j < LOOK_AHEAD; j++)
1516                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1517
1518                 rte_smp_rmb();
1519
1520                 /* Compute how many status bits were set */
1521                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1522                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1523                         ;
1524
1525                 for (j = 0; j < nb_dd; j++)
1526                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1527                                                        lo_dword.data);
1528
1529                 nb_rx += nb_dd;
1530
1531                 /* Translate descriptor info to mbuf format */
1532                 for (j = 0; j < nb_dd; ++j) {
1533                         mb = rxep[j].mbuf;
1534                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1535                                   rxq->crc_len;
1536                         mb->data_len = pkt_len;
1537                         mb->pkt_len = pkt_len;
1538                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1539
1540                         /* convert descriptor fields to rte mbuf flags */
1541                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1542                                 vlan_flags);
1543                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1544                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1545                                         ((uint16_t)pkt_info[j]);
1546                         mb->ol_flags = pkt_flags;
1547                         mb->packet_type =
1548                                 ixgbe_rxd_pkt_info_to_pkt_type
1549                                         (pkt_info[j], rxq->pkt_type_mask);
1550
1551                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1552                                 mb->hash.rss = rte_le_to_cpu_32(
1553                                     rxdp[j].wb.lower.hi_dword.rss);
1554                         else if (pkt_flags & PKT_RX_FDIR) {
1555                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1556                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1557                                     IXGBE_ATR_HASH_MASK;
1558                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1559                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1560                         }
1561                 }
1562
1563                 /* Move mbuf pointers from the S/W ring to the stage */
1564                 for (j = 0; j < LOOK_AHEAD; ++j) {
1565                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1566                 }
1567
1568                 /* stop if all requested packets could not be received */
1569                 if (nb_dd != LOOK_AHEAD)
1570                         break;
1571         }
1572
1573         /* clear software ring entries so we can cleanup correctly */
1574         for (i = 0; i < nb_rx; ++i) {
1575                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1576         }
1577
1578
1579         return nb_rx;
1580 }
1581
1582 static inline int
1583 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1584 {
1585         volatile union ixgbe_adv_rx_desc *rxdp;
1586         struct ixgbe_rx_entry *rxep;
1587         struct rte_mbuf *mb;
1588         uint16_t alloc_idx;
1589         __le64 dma_addr;
1590         int diag, i;
1591
1592         /* allocate buffers in bulk directly into the S/W ring */
1593         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1594         rxep = &rxq->sw_ring[alloc_idx];
1595         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1596                                     rxq->rx_free_thresh);
1597         if (unlikely(diag != 0))
1598                 return -ENOMEM;
1599
1600         rxdp = &rxq->rx_ring[alloc_idx];
1601         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1602                 /* populate the static rte mbuf fields */
1603                 mb = rxep[i].mbuf;
1604                 if (reset_mbuf) {
1605                         mb->port = rxq->port_id;
1606                 }
1607
1608                 rte_mbuf_refcnt_set(mb, 1);
1609                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1610
1611                 /* populate the descriptors */
1612                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1613                 rxdp[i].read.hdr_addr = 0;
1614                 rxdp[i].read.pkt_addr = dma_addr;
1615         }
1616
1617         /* update state of internal queue structure */
1618         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1619         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1620                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1621
1622         /* no errors */
1623         return 0;
1624 }
1625
1626 static inline uint16_t
1627 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1628                          uint16_t nb_pkts)
1629 {
1630         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1631         int i;
1632
1633         /* how many packets are ready to return? */
1634         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1635
1636         /* copy mbuf pointers to the application's packet list */
1637         for (i = 0; i < nb_pkts; ++i)
1638                 rx_pkts[i] = stage[i];
1639
1640         /* update internal queue state */
1641         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1642         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1643
1644         return nb_pkts;
1645 }
1646
1647 static inline uint16_t
1648 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1649              uint16_t nb_pkts)
1650 {
1651         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1652         uint16_t nb_rx = 0;
1653
1654         /* Any previously recv'd pkts will be returned from the Rx stage */
1655         if (rxq->rx_nb_avail)
1656                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1657
1658         /* Scan the H/W ring for packets to receive */
1659         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1660
1661         /* update internal queue state */
1662         rxq->rx_next_avail = 0;
1663         rxq->rx_nb_avail = nb_rx;
1664         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1665
1666         /* if required, allocate new buffers to replenish descriptors */
1667         if (rxq->rx_tail > rxq->rx_free_trigger) {
1668                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1669
1670                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1671                         int i, j;
1672
1673                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1674                                    "queue_id=%u", (unsigned) rxq->port_id,
1675                                    (unsigned) rxq->queue_id);
1676
1677                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1678                                 rxq->rx_free_thresh;
1679
1680                         /*
1681                          * Need to rewind any previous receives if we cannot
1682                          * allocate new buffers to replenish the old ones.
1683                          */
1684                         rxq->rx_nb_avail = 0;
1685                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1686                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1687                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1688
1689                         return 0;
1690                 }
1691
1692                 /* update tail pointer */
1693                 rte_wmb();
1694                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1695                                             cur_free_trigger);
1696         }
1697
1698         if (rxq->rx_tail >= rxq->nb_rx_desc)
1699                 rxq->rx_tail = 0;
1700
1701         /* received any packets this loop? */
1702         if (rxq->rx_nb_avail)
1703                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1704
1705         return 0;
1706 }
1707
1708 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1709 uint16_t
1710 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1711                            uint16_t nb_pkts)
1712 {
1713         uint16_t nb_rx;
1714
1715         if (unlikely(nb_pkts == 0))
1716                 return 0;
1717
1718         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1719                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1720
1721         /* request is relatively large, chunk it up */
1722         nb_rx = 0;
1723         while (nb_pkts) {
1724                 uint16_t ret, n;
1725
1726                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1727                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1728                 nb_rx = (uint16_t)(nb_rx + ret);
1729                 nb_pkts = (uint16_t)(nb_pkts - ret);
1730                 if (ret < n)
1731                         break;
1732         }
1733
1734         return nb_rx;
1735 }
1736
1737 uint16_t
1738 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1739                 uint16_t nb_pkts)
1740 {
1741         struct ixgbe_rx_queue *rxq;
1742         volatile union ixgbe_adv_rx_desc *rx_ring;
1743         volatile union ixgbe_adv_rx_desc *rxdp;
1744         struct ixgbe_rx_entry *sw_ring;
1745         struct ixgbe_rx_entry *rxe;
1746         struct rte_mbuf *rxm;
1747         struct rte_mbuf *nmb;
1748         union ixgbe_adv_rx_desc rxd;
1749         uint64_t dma_addr;
1750         uint32_t staterr;
1751         uint32_t pkt_info;
1752         uint16_t pkt_len;
1753         uint16_t rx_id;
1754         uint16_t nb_rx;
1755         uint16_t nb_hold;
1756         uint64_t pkt_flags;
1757         uint64_t vlan_flags;
1758
1759         nb_rx = 0;
1760         nb_hold = 0;
1761         rxq = rx_queue;
1762         rx_id = rxq->rx_tail;
1763         rx_ring = rxq->rx_ring;
1764         sw_ring = rxq->sw_ring;
1765         vlan_flags = rxq->vlan_flags;
1766         while (nb_rx < nb_pkts) {
1767                 /*
1768                  * The order of operations here is important as the DD status
1769                  * bit must not be read after any other descriptor fields.
1770                  * rx_ring and rxdp are pointing to volatile data so the order
1771                  * of accesses cannot be reordered by the compiler. If they were
1772                  * not volatile, they could be reordered which could lead to
1773                  * using invalid descriptor fields when read from rxd.
1774                  */
1775                 rxdp = &rx_ring[rx_id];
1776                 staterr = rxdp->wb.upper.status_error;
1777                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1778                         break;
1779                 rxd = *rxdp;
1780
1781                 /*
1782                  * End of packet.
1783                  *
1784                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1785                  * is likely to be invalid and to be dropped by the various
1786                  * validation checks performed by the network stack.
1787                  *
1788                  * Allocate a new mbuf to replenish the RX ring descriptor.
1789                  * If the allocation fails:
1790                  *    - arrange for that RX descriptor to be the first one
1791                  *      being parsed the next time the receive function is
1792                  *      invoked [on the same queue].
1793                  *
1794                  *    - Stop parsing the RX ring and return immediately.
1795                  *
1796                  * This policy do not drop the packet received in the RX
1797                  * descriptor for which the allocation of a new mbuf failed.
1798                  * Thus, it allows that packet to be later retrieved if
1799                  * mbuf have been freed in the mean time.
1800                  * As a side effect, holding RX descriptors instead of
1801                  * systematically giving them back to the NIC may lead to
1802                  * RX ring exhaustion situations.
1803                  * However, the NIC can gracefully prevent such situations
1804                  * to happen by sending specific "back-pressure" flow control
1805                  * frames to its peer(s).
1806                  */
1807                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1808                            "ext_err_stat=0x%08x pkt_len=%u",
1809                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1810                            (unsigned) rx_id, (unsigned) staterr,
1811                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1812
1813                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1814                 if (nmb == NULL) {
1815                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1816                                    "queue_id=%u", (unsigned) rxq->port_id,
1817                                    (unsigned) rxq->queue_id);
1818                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1819                         break;
1820                 }
1821
1822                 nb_hold++;
1823                 rxe = &sw_ring[rx_id];
1824                 rx_id++;
1825                 if (rx_id == rxq->nb_rx_desc)
1826                         rx_id = 0;
1827
1828                 /* Prefetch next mbuf while processing current one. */
1829                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1830
1831                 /*
1832                  * When next RX descriptor is on a cache-line boundary,
1833                  * prefetch the next 4 RX descriptors and the next 8 pointers
1834                  * to mbufs.
1835                  */
1836                 if ((rx_id & 0x3) == 0) {
1837                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1838                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1839                 }
1840
1841                 rxm = rxe->mbuf;
1842                 rxe->mbuf = nmb;
1843                 dma_addr =
1844                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1845                 rxdp->read.hdr_addr = 0;
1846                 rxdp->read.pkt_addr = dma_addr;
1847
1848                 /*
1849                  * Initialize the returned mbuf.
1850                  * 1) setup generic mbuf fields:
1851                  *    - number of segments,
1852                  *    - next segment,
1853                  *    - packet length,
1854                  *    - RX port identifier.
1855                  * 2) integrate hardware offload data, if any:
1856                  *    - RSS flag & hash,
1857                  *    - IP checksum flag,
1858                  *    - VLAN TCI, if any,
1859                  *    - error flags.
1860                  */
1861                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1862                                       rxq->crc_len);
1863                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1864                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1865                 rxm->nb_segs = 1;
1866                 rxm->next = NULL;
1867                 rxm->pkt_len = pkt_len;
1868                 rxm->data_len = pkt_len;
1869                 rxm->port = rxq->port_id;
1870
1871                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1872                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1873                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1874
1875                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1876                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1877                 pkt_flags = pkt_flags |
1878                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1879                 rxm->ol_flags = pkt_flags;
1880                 rxm->packet_type =
1881                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1882                                                        rxq->pkt_type_mask);
1883
1884                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1885                         rxm->hash.rss = rte_le_to_cpu_32(
1886                                                 rxd.wb.lower.hi_dword.rss);
1887                 else if (pkt_flags & PKT_RX_FDIR) {
1888                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1889                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1890                                         IXGBE_ATR_HASH_MASK;
1891                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1892                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1893                 }
1894                 /*
1895                  * Store the mbuf address into the next entry of the array
1896                  * of returned packets.
1897                  */
1898                 rx_pkts[nb_rx++] = rxm;
1899         }
1900         rxq->rx_tail = rx_id;
1901
1902         /*
1903          * If the number of free RX descriptors is greater than the RX free
1904          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1905          * register.
1906          * Update the RDT with the value of the last processed RX descriptor
1907          * minus 1, to guarantee that the RDT register is never equal to the
1908          * RDH register, which creates a "full" ring situtation from the
1909          * hardware point of view...
1910          */
1911         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1912         if (nb_hold > rxq->rx_free_thresh) {
1913                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1914                            "nb_hold=%u nb_rx=%u",
1915                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1916                            (unsigned) rx_id, (unsigned) nb_hold,
1917                            (unsigned) nb_rx);
1918                 rx_id = (uint16_t) ((rx_id == 0) ?
1919                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1920                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1921                 nb_hold = 0;
1922         }
1923         rxq->nb_rx_hold = nb_hold;
1924         return nb_rx;
1925 }
1926
1927 /**
1928  * Detect an RSC descriptor.
1929  */
1930 static inline uint32_t
1931 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1932 {
1933         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1934                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1935 }
1936
1937 /**
1938  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1939  *
1940  * Fill the following info in the HEAD buffer of the Rx cluster:
1941  *    - RX port identifier
1942  *    - hardware offload data, if any:
1943  *      - RSS flag & hash
1944  *      - IP checksum flag
1945  *      - VLAN TCI, if any
1946  *      - error flags
1947  * @head HEAD of the packet cluster
1948  * @desc HW descriptor to get data from
1949  * @rxq Pointer to the Rx queue
1950  */
1951 static inline void
1952 ixgbe_fill_cluster_head_buf(
1953         struct rte_mbuf *head,
1954         union ixgbe_adv_rx_desc *desc,
1955         struct ixgbe_rx_queue *rxq,
1956         uint32_t staterr)
1957 {
1958         uint32_t pkt_info;
1959         uint64_t pkt_flags;
1960
1961         head->port = rxq->port_id;
1962
1963         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1964          * set in the pkt_flags field.
1965          */
1966         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1967         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1968         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1969         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1970         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1971         head->ol_flags = pkt_flags;
1972         head->packet_type =
1973                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1974
1975         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1976                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1977         else if (pkt_flags & PKT_RX_FDIR) {
1978                 head->hash.fdir.hash =
1979                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1980                                                           & IXGBE_ATR_HASH_MASK;
1981                 head->hash.fdir.id =
1982                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1983         }
1984 }
1985
1986 /**
1987  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1988  *
1989  * @rx_queue Rx queue handle
1990  * @rx_pkts table of received packets
1991  * @nb_pkts size of rx_pkts table
1992  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1993  *
1994  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1995  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1996  *
1997  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1998  * 1) When non-EOP RSC completion arrives:
1999  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2000  *       segment's data length.
2001  *    b) Set the "next" pointer of the current segment to point to the segment
2002  *       at the NEXTP index.
2003  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2004  *       in the sw_rsc_ring.
2005  * 2) When EOP arrives we just update the cluster's total length and offload
2006  *    flags and deliver the cluster up to the upper layers. In our case - put it
2007  *    in the rx_pkts table.
2008  *
2009  * Returns the number of received packets/clusters (according to the "bulk
2010  * receive" interface).
2011  */
2012 static inline uint16_t
2013 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2014                     bool bulk_alloc)
2015 {
2016         struct ixgbe_rx_queue *rxq = rx_queue;
2017         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2018         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2019         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2020         uint16_t rx_id = rxq->rx_tail;
2021         uint16_t nb_rx = 0;
2022         uint16_t nb_hold = rxq->nb_rx_hold;
2023         uint16_t prev_id = rxq->rx_tail;
2024
2025         while (nb_rx < nb_pkts) {
2026                 bool eop;
2027                 struct ixgbe_rx_entry *rxe;
2028                 struct ixgbe_scattered_rx_entry *sc_entry;
2029                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2030                 struct ixgbe_rx_entry *next_rxe = NULL;
2031                 struct rte_mbuf *first_seg;
2032                 struct rte_mbuf *rxm;
2033                 struct rte_mbuf *nmb = NULL;
2034                 union ixgbe_adv_rx_desc rxd;
2035                 uint16_t data_len;
2036                 uint16_t next_id;
2037                 volatile union ixgbe_adv_rx_desc *rxdp;
2038                 uint32_t staterr;
2039
2040 next_desc:
2041                 /*
2042                  * The code in this whole file uses the volatile pointer to
2043                  * ensure the read ordering of the status and the rest of the
2044                  * descriptor fields (on the compiler level only!!!). This is so
2045                  * UGLY - why not to just use the compiler barrier instead? DPDK
2046                  * even has the rte_compiler_barrier() for that.
2047                  *
2048                  * But most importantly this is just wrong because this doesn't
2049                  * ensure memory ordering in a general case at all. For
2050                  * instance, DPDK is supposed to work on Power CPUs where
2051                  * compiler barrier may just not be enough!
2052                  *
2053                  * I tried to write only this function properly to have a
2054                  * starting point (as a part of an LRO/RSC series) but the
2055                  * compiler cursed at me when I tried to cast away the
2056                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2057                  * keeping it the way it is for now.
2058                  *
2059                  * The code in this file is broken in so many other places and
2060                  * will just not work on a big endian CPU anyway therefore the
2061                  * lines below will have to be revisited together with the rest
2062                  * of the ixgbe PMD.
2063                  *
2064                  * TODO:
2065                  *    - Get rid of "volatile" and let the compiler do its job.
2066                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2067                  *      memory ordering below.
2068                  */
2069                 rxdp = &rx_ring[rx_id];
2070                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2071
2072                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2073                         break;
2074
2075                 rxd = *rxdp;
2076
2077                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2078                                   "staterr=0x%x data_len=%u",
2079                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2080                            rte_le_to_cpu_16(rxd.wb.upper.length));
2081
2082                 if (!bulk_alloc) {
2083                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2084                         if (nmb == NULL) {
2085                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2086                                                   "port_id=%u queue_id=%u",
2087                                            rxq->port_id, rxq->queue_id);
2088
2089                                 rte_eth_devices[rxq->port_id].data->
2090                                                         rx_mbuf_alloc_failed++;
2091                                 break;
2092                         }
2093                 } else if (nb_hold > rxq->rx_free_thresh) {
2094                         uint16_t next_rdt = rxq->rx_free_trigger;
2095
2096                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2097                                 rte_wmb();
2098                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2099                                                             next_rdt);
2100                                 nb_hold -= rxq->rx_free_thresh;
2101                         } else {
2102                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2103                                                   "port_id=%u queue_id=%u",
2104                                            rxq->port_id, rxq->queue_id);
2105
2106                                 rte_eth_devices[rxq->port_id].data->
2107                                                         rx_mbuf_alloc_failed++;
2108                                 break;
2109                         }
2110                 }
2111
2112                 nb_hold++;
2113                 rxe = &sw_ring[rx_id];
2114                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2115
2116                 next_id = rx_id + 1;
2117                 if (next_id == rxq->nb_rx_desc)
2118                         next_id = 0;
2119
2120                 /* Prefetch next mbuf while processing current one. */
2121                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2122
2123                 /*
2124                  * When next RX descriptor is on a cache-line boundary,
2125                  * prefetch the next 4 RX descriptors and the next 4 pointers
2126                  * to mbufs.
2127                  */
2128                 if ((next_id & 0x3) == 0) {
2129                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2130                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2131                 }
2132
2133                 rxm = rxe->mbuf;
2134
2135                 if (!bulk_alloc) {
2136                         __le64 dma =
2137                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2138                         /*
2139                          * Update RX descriptor with the physical address of the
2140                          * new data buffer of the new allocated mbuf.
2141                          */
2142                         rxe->mbuf = nmb;
2143
2144                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2145                         rxdp->read.hdr_addr = 0;
2146                         rxdp->read.pkt_addr = dma;
2147                 } else
2148                         rxe->mbuf = NULL;
2149
2150                 /*
2151                  * Set data length & data buffer address of mbuf.
2152                  */
2153                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2154                 rxm->data_len = data_len;
2155
2156                 if (!eop) {
2157                         uint16_t nextp_id;
2158                         /*
2159                          * Get next descriptor index:
2160                          *  - For RSC it's in the NEXTP field.
2161                          *  - For a scattered packet - it's just a following
2162                          *    descriptor.
2163                          */
2164                         if (ixgbe_rsc_count(&rxd))
2165                                 nextp_id =
2166                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2167                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2168                         else
2169                                 nextp_id = next_id;
2170
2171                         next_sc_entry = &sw_sc_ring[nextp_id];
2172                         next_rxe = &sw_ring[nextp_id];
2173                         rte_ixgbe_prefetch(next_rxe);
2174                 }
2175
2176                 sc_entry = &sw_sc_ring[rx_id];
2177                 first_seg = sc_entry->fbuf;
2178                 sc_entry->fbuf = NULL;
2179
2180                 /*
2181                  * If this is the first buffer of the received packet,
2182                  * set the pointer to the first mbuf of the packet and
2183                  * initialize its context.
2184                  * Otherwise, update the total length and the number of segments
2185                  * of the current scattered packet, and update the pointer to
2186                  * the last mbuf of the current packet.
2187                  */
2188                 if (first_seg == NULL) {
2189                         first_seg = rxm;
2190                         first_seg->pkt_len = data_len;
2191                         first_seg->nb_segs = 1;
2192                 } else {
2193                         first_seg->pkt_len += data_len;
2194                         first_seg->nb_segs++;
2195                 }
2196
2197                 prev_id = rx_id;
2198                 rx_id = next_id;
2199
2200                 /*
2201                  * If this is not the last buffer of the received packet, update
2202                  * the pointer to the first mbuf at the NEXTP entry in the
2203                  * sw_sc_ring and continue to parse the RX ring.
2204                  */
2205                 if (!eop && next_rxe) {
2206                         rxm->next = next_rxe->mbuf;
2207                         next_sc_entry->fbuf = first_seg;
2208                         goto next_desc;
2209                 }
2210
2211                 /* Initialize the first mbuf of the returned packet */
2212                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2213
2214                 /*
2215                  * Deal with the case, when HW CRC srip is disabled.
2216                  * That can't happen when LRO is enabled, but still could
2217                  * happen for scattered RX mode.
2218                  */
2219                 first_seg->pkt_len -= rxq->crc_len;
2220                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2221                         struct rte_mbuf *lp;
2222
2223                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2224                                 ;
2225
2226                         first_seg->nb_segs--;
2227                         lp->data_len -= rxq->crc_len - rxm->data_len;
2228                         lp->next = NULL;
2229                         rte_pktmbuf_free_seg(rxm);
2230                 } else
2231                         rxm->data_len -= rxq->crc_len;
2232
2233                 /* Prefetch data of first segment, if configured to do so. */
2234                 rte_packet_prefetch((char *)first_seg->buf_addr +
2235                         first_seg->data_off);
2236
2237                 /*
2238                  * Store the mbuf address into the next entry of the array
2239                  * of returned packets.
2240                  */
2241                 rx_pkts[nb_rx++] = first_seg;
2242         }
2243
2244         /*
2245          * Record index of the next RX descriptor to probe.
2246          */
2247         rxq->rx_tail = rx_id;
2248
2249         /*
2250          * If the number of free RX descriptors is greater than the RX free
2251          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2252          * register.
2253          * Update the RDT with the value of the last processed RX descriptor
2254          * minus 1, to guarantee that the RDT register is never equal to the
2255          * RDH register, which creates a "full" ring situtation from the
2256          * hardware point of view...
2257          */
2258         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2259                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2260                            "nb_hold=%u nb_rx=%u",
2261                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2262
2263                 rte_wmb();
2264                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2265                 nb_hold = 0;
2266         }
2267
2268         rxq->nb_rx_hold = nb_hold;
2269         return nb_rx;
2270 }
2271
2272 uint16_t
2273 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2274                                  uint16_t nb_pkts)
2275 {
2276         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2277 }
2278
2279 uint16_t
2280 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2281                                uint16_t nb_pkts)
2282 {
2283         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2284 }
2285
2286 /*********************************************************************
2287  *
2288  *  Queue management functions
2289  *
2290  **********************************************************************/
2291
2292 static void __attribute__((cold))
2293 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2294 {
2295         unsigned i;
2296
2297         if (txq->sw_ring != NULL) {
2298                 for (i = 0; i < txq->nb_tx_desc; i++) {
2299                         if (txq->sw_ring[i].mbuf != NULL) {
2300                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2301                                 txq->sw_ring[i].mbuf = NULL;
2302                         }
2303                 }
2304         }
2305 }
2306
2307 static void __attribute__((cold))
2308 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2309 {
2310         if (txq != NULL &&
2311             txq->sw_ring != NULL)
2312                 rte_free(txq->sw_ring);
2313 }
2314
2315 static void __attribute__((cold))
2316 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2317 {
2318         if (txq != NULL && txq->ops != NULL) {
2319                 txq->ops->release_mbufs(txq);
2320                 txq->ops->free_swring(txq);
2321                 rte_free(txq);
2322         }
2323 }
2324
2325 void __attribute__((cold))
2326 ixgbe_dev_tx_queue_release(void *txq)
2327 {
2328         ixgbe_tx_queue_release(txq);
2329 }
2330
2331 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2332 static void __attribute__((cold))
2333 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2334 {
2335         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2336         struct ixgbe_tx_entry *txe = txq->sw_ring;
2337         uint16_t prev, i;
2338
2339         /* Zero out HW ring memory */
2340         for (i = 0; i < txq->nb_tx_desc; i++) {
2341                 txq->tx_ring[i] = zeroed_desc;
2342         }
2343
2344         /* Initialize SW ring entries */
2345         prev = (uint16_t) (txq->nb_tx_desc - 1);
2346         for (i = 0; i < txq->nb_tx_desc; i++) {
2347                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2348
2349                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2350                 txe[i].mbuf = NULL;
2351                 txe[i].last_id = i;
2352                 txe[prev].next_id = i;
2353                 prev = i;
2354         }
2355
2356         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2357         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2358
2359         txq->tx_tail = 0;
2360         txq->nb_tx_used = 0;
2361         /*
2362          * Always allow 1 descriptor to be un-allocated to avoid
2363          * a H/W race condition
2364          */
2365         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2366         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2367         txq->ctx_curr = 0;
2368         memset((void *)&txq->ctx_cache, 0,
2369                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2370 }
2371
2372 static const struct ixgbe_txq_ops def_txq_ops = {
2373         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2374         .free_swring = ixgbe_tx_free_swring,
2375         .reset = ixgbe_reset_tx_queue,
2376 };
2377
2378 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2379  * the queue parameters. Used in tx_queue_setup by primary process and then
2380  * in dev_init by secondary process when attaching to an existing ethdev.
2381  */
2382 void __attribute__((cold))
2383 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2384 {
2385         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2386         if ((txq->offloads == 0) &&
2387 #ifdef RTE_LIBRTE_SECURITY
2388                         !(txq->using_ipsec) &&
2389 #endif
2390                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2391                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2392                 dev->tx_pkt_prepare = NULL;
2393 #ifdef RTE_IXGBE_INC_VECTOR
2394                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2395                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2396                                         ixgbe_txq_vec_setup(txq) == 0)) {
2397                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2398                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2399                 } else
2400 #endif
2401                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2402         } else {
2403                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2404                 PMD_INIT_LOG(DEBUG,
2405                                 " - offloads = 0x%" PRIx64,
2406                                 txq->offloads);
2407                 PMD_INIT_LOG(DEBUG,
2408                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2409                                 (unsigned long)txq->tx_rs_thresh,
2410                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2411                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2412                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2413         }
2414 }
2415
2416 uint64_t
2417 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2418 {
2419         RTE_SET_USED(dev);
2420
2421         return 0;
2422 }
2423
2424 uint64_t
2425 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2426 {
2427         uint64_t tx_offload_capa;
2428         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2429
2430         tx_offload_capa =
2431                 DEV_TX_OFFLOAD_VLAN_INSERT |
2432                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2433                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2434                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2435                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2436                 DEV_TX_OFFLOAD_TCP_TSO     |
2437                 DEV_TX_OFFLOAD_MULTI_SEGS;
2438
2439         if (hw->mac.type == ixgbe_mac_82599EB ||
2440             hw->mac.type == ixgbe_mac_X540)
2441                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2442
2443         if (hw->mac.type == ixgbe_mac_X550 ||
2444             hw->mac.type == ixgbe_mac_X550EM_x ||
2445             hw->mac.type == ixgbe_mac_X550EM_a)
2446                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2447
2448 #ifdef RTE_LIBRTE_SECURITY
2449         if (dev->security_ctx)
2450                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2451 #endif
2452         return tx_offload_capa;
2453 }
2454
2455 int __attribute__((cold))
2456 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2457                          uint16_t queue_idx,
2458                          uint16_t nb_desc,
2459                          unsigned int socket_id,
2460                          const struct rte_eth_txconf *tx_conf)
2461 {
2462         const struct rte_memzone *tz;
2463         struct ixgbe_tx_queue *txq;
2464         struct ixgbe_hw     *hw;
2465         uint16_t tx_rs_thresh, tx_free_thresh;
2466         uint64_t offloads;
2467
2468         PMD_INIT_FUNC_TRACE();
2469         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2470
2471         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2472
2473         /*
2474          * Validate number of transmit descriptors.
2475          * It must not exceed hardware maximum, and must be multiple
2476          * of IXGBE_ALIGN.
2477          */
2478         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2479                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2480                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2481                 return -EINVAL;
2482         }
2483
2484         /*
2485          * The following two parameters control the setting of the RS bit on
2486          * transmit descriptors.
2487          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2488          * descriptors have been used.
2489          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2490          * descriptors are used or if the number of descriptors required
2491          * to transmit a packet is greater than the number of free TX
2492          * descriptors.
2493          * The following constraints must be satisfied:
2494          *  tx_rs_thresh must be greater than 0.
2495          *  tx_rs_thresh must be less than the size of the ring minus 2.
2496          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2497          *  tx_rs_thresh must be a divisor of the ring size.
2498          *  tx_free_thresh must be greater than 0.
2499          *  tx_free_thresh must be less than the size of the ring minus 3.
2500          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2501          * One descriptor in the TX ring is used as a sentinel to avoid a
2502          * H/W race condition, hence the maximum threshold constraints.
2503          * When set to zero use default values.
2504          */
2505         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2506                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2507         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2508         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2509                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2510         if (tx_conf->tx_rs_thresh > 0)
2511                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2512         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2513                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2514                              "exceed nb_desc. (tx_rs_thresh=%u "
2515                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2516                              (unsigned int)tx_rs_thresh,
2517                              (unsigned int)tx_free_thresh,
2518                              (unsigned int)nb_desc,
2519                              (int)dev->data->port_id,
2520                              (int)queue_idx);
2521                 return -(EINVAL);
2522         }
2523         if (tx_rs_thresh >= (nb_desc - 2)) {
2524                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2525                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2526                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2527                         (int)dev->data->port_id, (int)queue_idx);
2528                 return -(EINVAL);
2529         }
2530         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2531                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2532                         "(tx_rs_thresh=%u port=%d queue=%d)",
2533                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2534                         (int)dev->data->port_id, (int)queue_idx);
2535                 return -(EINVAL);
2536         }
2537         if (tx_free_thresh >= (nb_desc - 3)) {
2538                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2539                              "tx_free_thresh must be less than the number of "
2540                              "TX descriptors minus 3. (tx_free_thresh=%u "
2541                              "port=%d queue=%d)",
2542                              (unsigned int)tx_free_thresh,
2543                              (int)dev->data->port_id, (int)queue_idx);
2544                 return -(EINVAL);
2545         }
2546         if (tx_rs_thresh > tx_free_thresh) {
2547                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2548                              "tx_free_thresh. (tx_free_thresh=%u "
2549                              "tx_rs_thresh=%u port=%d queue=%d)",
2550                              (unsigned int)tx_free_thresh,
2551                              (unsigned int)tx_rs_thresh,
2552                              (int)dev->data->port_id,
2553                              (int)queue_idx);
2554                 return -(EINVAL);
2555         }
2556         if ((nb_desc % tx_rs_thresh) != 0) {
2557                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2558                              "number of TX descriptors. (tx_rs_thresh=%u "
2559                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2560                              (int)dev->data->port_id, (int)queue_idx);
2561                 return -(EINVAL);
2562         }
2563
2564         /*
2565          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2566          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2567          * by the NIC and all descriptors are written back after the NIC
2568          * accumulates WTHRESH descriptors.
2569          */
2570         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2571                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2572                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2573                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2574                              (int)dev->data->port_id, (int)queue_idx);
2575                 return -(EINVAL);
2576         }
2577
2578         /* Free memory prior to re-allocation if needed... */
2579         if (dev->data->tx_queues[queue_idx] != NULL) {
2580                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2581                 dev->data->tx_queues[queue_idx] = NULL;
2582         }
2583
2584         /* First allocate the tx queue data structure */
2585         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2586                                  RTE_CACHE_LINE_SIZE, socket_id);
2587         if (txq == NULL)
2588                 return -ENOMEM;
2589
2590         /*
2591          * Allocate TX ring hardware descriptors. A memzone large enough to
2592          * handle the maximum ring size is allocated in order to allow for
2593          * resizing in later calls to the queue setup function.
2594          */
2595         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2596                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2597                         IXGBE_ALIGN, socket_id);
2598         if (tz == NULL) {
2599                 ixgbe_tx_queue_release(txq);
2600                 return -ENOMEM;
2601         }
2602
2603         txq->nb_tx_desc = nb_desc;
2604         txq->tx_rs_thresh = tx_rs_thresh;
2605         txq->tx_free_thresh = tx_free_thresh;
2606         txq->pthresh = tx_conf->tx_thresh.pthresh;
2607         txq->hthresh = tx_conf->tx_thresh.hthresh;
2608         txq->wthresh = tx_conf->tx_thresh.wthresh;
2609         txq->queue_id = queue_idx;
2610         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2611                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2612         txq->port_id = dev->data->port_id;
2613         txq->offloads = offloads;
2614         txq->ops = &def_txq_ops;
2615         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2616 #ifdef RTE_LIBRTE_SECURITY
2617         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2618                         DEV_TX_OFFLOAD_SECURITY);
2619 #endif
2620
2621         /*
2622          * Modification to set VFTDT for virtual function if vf is detected
2623          */
2624         if (hw->mac.type == ixgbe_mac_82599_vf ||
2625             hw->mac.type == ixgbe_mac_X540_vf ||
2626             hw->mac.type == ixgbe_mac_X550_vf ||
2627             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2628             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2629                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2630         else
2631                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2632
2633         txq->tx_ring_phys_addr = tz->iova;
2634         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2635
2636         /* Allocate software ring */
2637         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2638                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2639                                 RTE_CACHE_LINE_SIZE, socket_id);
2640         if (txq->sw_ring == NULL) {
2641                 ixgbe_tx_queue_release(txq);
2642                 return -ENOMEM;
2643         }
2644         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2645                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2646
2647         /* set up vector or scalar TX function as appropriate */
2648         ixgbe_set_tx_function(dev, txq);
2649
2650         txq->ops->reset(txq);
2651
2652         dev->data->tx_queues[queue_idx] = txq;
2653
2654
2655         return 0;
2656 }
2657
2658 /**
2659  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2660  *
2661  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2662  * in the sw_rsc_ring is not set to NULL but rather points to the next
2663  * mbuf of this RSC aggregation (that has not been completed yet and still
2664  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2665  * will just free first "nb_segs" segments of the cluster explicitly by calling
2666  * an rte_pktmbuf_free_seg().
2667  *
2668  * @m scattered cluster head
2669  */
2670 static void __attribute__((cold))
2671 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2672 {
2673         uint16_t i, nb_segs = m->nb_segs;
2674         struct rte_mbuf *next_seg;
2675
2676         for (i = 0; i < nb_segs; i++) {
2677                 next_seg = m->next;
2678                 rte_pktmbuf_free_seg(m);
2679                 m = next_seg;
2680         }
2681 }
2682
2683 static void __attribute__((cold))
2684 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2685 {
2686         unsigned i;
2687
2688 #ifdef RTE_IXGBE_INC_VECTOR
2689         /* SSE Vector driver has a different way of releasing mbufs. */
2690         if (rxq->rx_using_sse) {
2691                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2692                 return;
2693         }
2694 #endif
2695
2696         if (rxq->sw_ring != NULL) {
2697                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2698                         if (rxq->sw_ring[i].mbuf != NULL) {
2699                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2700                                 rxq->sw_ring[i].mbuf = NULL;
2701                         }
2702                 }
2703                 if (rxq->rx_nb_avail) {
2704                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2705                                 struct rte_mbuf *mb;
2706
2707                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2708                                 rte_pktmbuf_free_seg(mb);
2709                         }
2710                         rxq->rx_nb_avail = 0;
2711                 }
2712         }
2713
2714         if (rxq->sw_sc_ring)
2715                 for (i = 0; i < rxq->nb_rx_desc; i++)
2716                         if (rxq->sw_sc_ring[i].fbuf) {
2717                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2718                                 rxq->sw_sc_ring[i].fbuf = NULL;
2719                         }
2720 }
2721
2722 static void __attribute__((cold))
2723 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2724 {
2725         if (rxq != NULL) {
2726                 ixgbe_rx_queue_release_mbufs(rxq);
2727                 rte_free(rxq->sw_ring);
2728                 rte_free(rxq->sw_sc_ring);
2729                 rte_free(rxq);
2730         }
2731 }
2732
2733 void __attribute__((cold))
2734 ixgbe_dev_rx_queue_release(void *rxq)
2735 {
2736         ixgbe_rx_queue_release(rxq);
2737 }
2738
2739 /*
2740  * Check if Rx Burst Bulk Alloc function can be used.
2741  * Return
2742  *        0: the preconditions are satisfied and the bulk allocation function
2743  *           can be used.
2744  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2745  *           function must be used.
2746  */
2747 static inline int __attribute__((cold))
2748 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2749 {
2750         int ret = 0;
2751
2752         /*
2753          * Make sure the following pre-conditions are satisfied:
2754          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2755          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2756          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2757          * Scattered packets are not supported.  This should be checked
2758          * outside of this function.
2759          */
2760         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2761                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2762                              "rxq->rx_free_thresh=%d, "
2763                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2764                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2765                 ret = -EINVAL;
2766         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2767                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2768                              "rxq->rx_free_thresh=%d, "
2769                              "rxq->nb_rx_desc=%d",
2770                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2771                 ret = -EINVAL;
2772         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2773                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2774                              "rxq->nb_rx_desc=%d, "
2775                              "rxq->rx_free_thresh=%d",
2776                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2777                 ret = -EINVAL;
2778         }
2779
2780         return ret;
2781 }
2782
2783 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2784 static void __attribute__((cold))
2785 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2786 {
2787         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2788         unsigned i;
2789         uint16_t len = rxq->nb_rx_desc;
2790
2791         /*
2792          * By default, the Rx queue setup function allocates enough memory for
2793          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2794          * extra memory at the end of the descriptor ring to be zero'd out.
2795          */
2796         if (adapter->rx_bulk_alloc_allowed)
2797                 /* zero out extra memory */
2798                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2799
2800         /*
2801          * Zero out HW ring memory. Zero out extra memory at the end of
2802          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2803          * reads extra memory as zeros.
2804          */
2805         for (i = 0; i < len; i++) {
2806                 rxq->rx_ring[i] = zeroed_desc;
2807         }
2808
2809         /*
2810          * initialize extra software ring entries. Space for these extra
2811          * entries is always allocated
2812          */
2813         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2814         for (i = rxq->nb_rx_desc; i < len; ++i) {
2815                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2816         }
2817
2818         rxq->rx_nb_avail = 0;
2819         rxq->rx_next_avail = 0;
2820         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2821         rxq->rx_tail = 0;
2822         rxq->nb_rx_hold = 0;
2823         rxq->pkt_first_seg = NULL;
2824         rxq->pkt_last_seg = NULL;
2825
2826 #ifdef RTE_IXGBE_INC_VECTOR
2827         rxq->rxrearm_start = 0;
2828         rxq->rxrearm_nb = 0;
2829 #endif
2830 }
2831
2832 static int
2833 ixgbe_is_vf(struct rte_eth_dev *dev)
2834 {
2835         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2836
2837         switch (hw->mac.type) {
2838         case ixgbe_mac_82599_vf:
2839         case ixgbe_mac_X540_vf:
2840         case ixgbe_mac_X550_vf:
2841         case ixgbe_mac_X550EM_x_vf:
2842         case ixgbe_mac_X550EM_a_vf:
2843                 return 1;
2844         default:
2845                 return 0;
2846         }
2847 }
2848
2849 uint64_t
2850 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2851 {
2852         uint64_t offloads = 0;
2853         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2854
2855         if (hw->mac.type != ixgbe_mac_82598EB)
2856                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2857
2858         return offloads;
2859 }
2860
2861 uint64_t
2862 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2863 {
2864         uint64_t offloads;
2865         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2866
2867         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2868                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2869                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2870                    DEV_RX_OFFLOAD_KEEP_CRC    |
2871                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2872                    DEV_RX_OFFLOAD_VLAN_FILTER |
2873                    DEV_RX_OFFLOAD_SCATTER;
2874
2875         if (hw->mac.type == ixgbe_mac_82598EB)
2876                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2877
2878         if (ixgbe_is_vf(dev) == 0)
2879                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2880
2881         /*
2882          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2883          * mode.
2884          */
2885         if ((hw->mac.type == ixgbe_mac_82599EB ||
2886              hw->mac.type == ixgbe_mac_X540 ||
2887              hw->mac.type == ixgbe_mac_X550) &&
2888             !RTE_ETH_DEV_SRIOV(dev).active)
2889                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
2890
2891         if (hw->mac.type == ixgbe_mac_82599EB ||
2892             hw->mac.type == ixgbe_mac_X540)
2893                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
2894
2895         if (hw->mac.type == ixgbe_mac_X550 ||
2896             hw->mac.type == ixgbe_mac_X550EM_x ||
2897             hw->mac.type == ixgbe_mac_X550EM_a)
2898                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
2899
2900 #ifdef RTE_LIBRTE_SECURITY
2901         if (dev->security_ctx)
2902                 offloads |= DEV_RX_OFFLOAD_SECURITY;
2903 #endif
2904
2905         return offloads;
2906 }
2907
2908 int __attribute__((cold))
2909 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2910                          uint16_t queue_idx,
2911                          uint16_t nb_desc,
2912                          unsigned int socket_id,
2913                          const struct rte_eth_rxconf *rx_conf,
2914                          struct rte_mempool *mp)
2915 {
2916         const struct rte_memzone *rz;
2917         struct ixgbe_rx_queue *rxq;
2918         struct ixgbe_hw     *hw;
2919         uint16_t len;
2920         struct ixgbe_adapter *adapter =
2921                 (struct ixgbe_adapter *)dev->data->dev_private;
2922         uint64_t offloads;
2923
2924         PMD_INIT_FUNC_TRACE();
2925         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2926
2927         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
2928
2929         /*
2930          * Validate number of receive descriptors.
2931          * It must not exceed hardware maximum, and must be multiple
2932          * of IXGBE_ALIGN.
2933          */
2934         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2935                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2936                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2937                 return -EINVAL;
2938         }
2939
2940         /* Free memory prior to re-allocation if needed... */
2941         if (dev->data->rx_queues[queue_idx] != NULL) {
2942                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2943                 dev->data->rx_queues[queue_idx] = NULL;
2944         }
2945
2946         /* First allocate the rx queue data structure */
2947         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2948                                  RTE_CACHE_LINE_SIZE, socket_id);
2949         if (rxq == NULL)
2950                 return -ENOMEM;
2951         rxq->mb_pool = mp;
2952         rxq->nb_rx_desc = nb_desc;
2953         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2954         rxq->queue_id = queue_idx;
2955         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2956                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2957         rxq->port_id = dev->data->port_id;
2958         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2959                 rxq->crc_len = RTE_ETHER_CRC_LEN;
2960         else
2961                 rxq->crc_len = 0;
2962         rxq->drop_en = rx_conf->rx_drop_en;
2963         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2964         rxq->offloads = offloads;
2965
2966         /*
2967          * The packet type in RX descriptor is different for different NICs.
2968          * Some bits are used for x550 but reserved for other NICS.
2969          * So set different masks for different NICs.
2970          */
2971         if (hw->mac.type == ixgbe_mac_X550 ||
2972             hw->mac.type == ixgbe_mac_X550EM_x ||
2973             hw->mac.type == ixgbe_mac_X550EM_a ||
2974             hw->mac.type == ixgbe_mac_X550_vf ||
2975             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2976             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2977                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2978         else
2979                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2980
2981         /*
2982          * Allocate RX ring hardware descriptors. A memzone large enough to
2983          * handle the maximum ring size is allocated in order to allow for
2984          * resizing in later calls to the queue setup function.
2985          */
2986         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2987                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2988         if (rz == NULL) {
2989                 ixgbe_rx_queue_release(rxq);
2990                 return -ENOMEM;
2991         }
2992
2993         /*
2994          * Zero init all the descriptors in the ring.
2995          */
2996         memset(rz->addr, 0, RX_RING_SZ);
2997
2998         /*
2999          * Modified to setup VFRDT for Virtual Function
3000          */
3001         if (hw->mac.type == ixgbe_mac_82599_vf ||
3002             hw->mac.type == ixgbe_mac_X540_vf ||
3003             hw->mac.type == ixgbe_mac_X550_vf ||
3004             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3005             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3006                 rxq->rdt_reg_addr =
3007                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3008                 rxq->rdh_reg_addr =
3009                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3010         } else {
3011                 rxq->rdt_reg_addr =
3012                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3013                 rxq->rdh_reg_addr =
3014                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3015         }
3016
3017         rxq->rx_ring_phys_addr = rz->iova;
3018         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3019
3020         /*
3021          * Certain constraints must be met in order to use the bulk buffer
3022          * allocation Rx burst function. If any of Rx queues doesn't meet them
3023          * the feature should be disabled for the whole port.
3024          */
3025         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3026                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3027                                     "preconditions - canceling the feature for "
3028                                     "the whole port[%d]",
3029                              rxq->queue_id, rxq->port_id);
3030                 adapter->rx_bulk_alloc_allowed = false;
3031         }
3032
3033         /*
3034          * Allocate software ring. Allow for space at the end of the
3035          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3036          * function does not access an invalid memory region.
3037          */
3038         len = nb_desc;
3039         if (adapter->rx_bulk_alloc_allowed)
3040                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3041
3042         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3043                                           sizeof(struct ixgbe_rx_entry) * len,
3044                                           RTE_CACHE_LINE_SIZE, socket_id);
3045         if (!rxq->sw_ring) {
3046                 ixgbe_rx_queue_release(rxq);
3047                 return -ENOMEM;
3048         }
3049
3050         /*
3051          * Always allocate even if it's not going to be needed in order to
3052          * simplify the code.
3053          *
3054          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3055          * be requested in ixgbe_dev_rx_init(), which is called later from
3056          * dev_start() flow.
3057          */
3058         rxq->sw_sc_ring =
3059                 rte_zmalloc_socket("rxq->sw_sc_ring",
3060                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3061                                    RTE_CACHE_LINE_SIZE, socket_id);
3062         if (!rxq->sw_sc_ring) {
3063                 ixgbe_rx_queue_release(rxq);
3064                 return -ENOMEM;
3065         }
3066
3067         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3068                             "dma_addr=0x%"PRIx64,
3069                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3070                      rxq->rx_ring_phys_addr);
3071
3072         if (!rte_is_power_of_2(nb_desc)) {
3073                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3074                                     "preconditions - canceling the feature for "
3075                                     "the whole port[%d]",
3076                              rxq->queue_id, rxq->port_id);
3077                 adapter->rx_vec_allowed = false;
3078         } else
3079                 ixgbe_rxq_vec_setup(rxq);
3080
3081         dev->data->rx_queues[queue_idx] = rxq;
3082
3083         ixgbe_reset_rx_queue(adapter, rxq);
3084
3085         return 0;
3086 }
3087
3088 uint32_t
3089 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3090 {
3091 #define IXGBE_RXQ_SCAN_INTERVAL 4
3092         volatile union ixgbe_adv_rx_desc *rxdp;
3093         struct ixgbe_rx_queue *rxq;
3094         uint32_t desc = 0;
3095
3096         rxq = dev->data->rx_queues[rx_queue_id];
3097         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3098
3099         while ((desc < rxq->nb_rx_desc) &&
3100                 (rxdp->wb.upper.status_error &
3101                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3102                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3103                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3104                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3105                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3106                                 desc - rxq->nb_rx_desc]);
3107         }
3108
3109         return desc;
3110 }
3111
3112 int
3113 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3114 {
3115         volatile union ixgbe_adv_rx_desc *rxdp;
3116         struct ixgbe_rx_queue *rxq = rx_queue;
3117         uint32_t desc;
3118
3119         if (unlikely(offset >= rxq->nb_rx_desc))
3120                 return 0;
3121         desc = rxq->rx_tail + offset;
3122         if (desc >= rxq->nb_rx_desc)
3123                 desc -= rxq->nb_rx_desc;
3124
3125         rxdp = &rxq->rx_ring[desc];
3126         return !!(rxdp->wb.upper.status_error &
3127                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3128 }
3129
3130 int
3131 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3132 {
3133         struct ixgbe_rx_queue *rxq = rx_queue;
3134         volatile uint32_t *status;
3135         uint32_t nb_hold, desc;
3136
3137         if (unlikely(offset >= rxq->nb_rx_desc))
3138                 return -EINVAL;
3139
3140 #ifdef RTE_IXGBE_INC_VECTOR
3141         if (rxq->rx_using_sse)
3142                 nb_hold = rxq->rxrearm_nb;
3143         else
3144 #endif
3145                 nb_hold = rxq->nb_rx_hold;
3146         if (offset >= rxq->nb_rx_desc - nb_hold)
3147                 return RTE_ETH_RX_DESC_UNAVAIL;
3148
3149         desc = rxq->rx_tail + offset;
3150         if (desc >= rxq->nb_rx_desc)
3151                 desc -= rxq->nb_rx_desc;
3152
3153         status = &rxq->rx_ring[desc].wb.upper.status_error;
3154         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3155                 return RTE_ETH_RX_DESC_DONE;
3156
3157         return RTE_ETH_RX_DESC_AVAIL;
3158 }
3159
3160 int
3161 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3162 {
3163         struct ixgbe_tx_queue *txq = tx_queue;
3164         volatile uint32_t *status;
3165         uint32_t desc;
3166
3167         if (unlikely(offset >= txq->nb_tx_desc))
3168                 return -EINVAL;
3169
3170         desc = txq->tx_tail + offset;
3171         /* go to next desc that has the RS bit */
3172         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3173                 txq->tx_rs_thresh;
3174         if (desc >= txq->nb_tx_desc) {
3175                 desc -= txq->nb_tx_desc;
3176                 if (desc >= txq->nb_tx_desc)
3177                         desc -= txq->nb_tx_desc;
3178         }
3179
3180         status = &txq->tx_ring[desc].wb.status;
3181         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3182                 return RTE_ETH_TX_DESC_DONE;
3183
3184         return RTE_ETH_TX_DESC_FULL;
3185 }
3186
3187 /*
3188  * Set up link loopback for X540/X550 mode Tx->Rx.
3189  */
3190 static inline void __attribute__((cold))
3191 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3192 {
3193         uint32_t macc;
3194         PMD_INIT_FUNC_TRACE();
3195
3196         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3197
3198         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3199                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3200         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3201
3202         if (enable) {
3203                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3204                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3205                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3206                 macc |= IXGBE_MACC_FLU;
3207         } else {
3208                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3209                 macc &= ~IXGBE_MACC_FLU;
3210         }
3211
3212         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3213                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3214
3215         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3216 }
3217
3218 void __attribute__((cold))
3219 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3220 {
3221         unsigned i;
3222         struct ixgbe_adapter *adapter =
3223                 (struct ixgbe_adapter *)dev->data->dev_private;
3224         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3225
3226         PMD_INIT_FUNC_TRACE();
3227
3228         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3229                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3230
3231                 if (txq != NULL) {
3232                         txq->ops->release_mbufs(txq);
3233                         txq->ops->reset(txq);
3234                 }
3235         }
3236
3237         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3238                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3239
3240                 if (rxq != NULL) {
3241                         ixgbe_rx_queue_release_mbufs(rxq);
3242                         ixgbe_reset_rx_queue(adapter, rxq);
3243                 }
3244         }
3245         /* If loopback mode was enabled, reconfigure the link accordingly */
3246         if (dev->data->dev_conf.lpbk_mode != 0) {
3247                 if (hw->mac.type == ixgbe_mac_X540 ||
3248                      hw->mac.type == ixgbe_mac_X550 ||
3249                      hw->mac.type == ixgbe_mac_X550EM_x ||
3250                      hw->mac.type == ixgbe_mac_X550EM_a)
3251                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3252         }
3253 }
3254
3255 void
3256 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3257 {
3258         unsigned i;
3259
3260         PMD_INIT_FUNC_TRACE();
3261
3262         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3263                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3264                 dev->data->rx_queues[i] = NULL;
3265         }
3266         dev->data->nb_rx_queues = 0;
3267
3268         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3269                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3270                 dev->data->tx_queues[i] = NULL;
3271         }
3272         dev->data->nb_tx_queues = 0;
3273 }
3274
3275 /*********************************************************************
3276  *
3277  *  Device RX/TX init functions
3278  *
3279  **********************************************************************/
3280
3281 /**
3282  * Receive Side Scaling (RSS)
3283  * See section 7.1.2.8 in the following document:
3284  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3285  *
3286  * Principles:
3287  * The source and destination IP addresses of the IP header and the source
3288  * and destination ports of TCP/UDP headers, if any, of received packets are
3289  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3290  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3291  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3292  * RSS output index which is used as the RX queue index where to store the
3293  * received packets.
3294  * The following output is supplied in the RX write-back descriptor:
3295  *     - 32-bit result of the Microsoft RSS hash function,
3296  *     - 4-bit RSS type field.
3297  */
3298
3299 /*
3300  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3301  * Used as the default key.
3302  */
3303 static uint8_t rss_intel_key[40] = {
3304         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3305         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3306         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3307         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3308         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3309 };
3310
3311 static void
3312 ixgbe_rss_disable(struct rte_eth_dev *dev)
3313 {
3314         struct ixgbe_hw *hw;
3315         uint32_t mrqc;
3316         uint32_t mrqc_reg;
3317
3318         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3319         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3320         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3321         mrqc &= ~IXGBE_MRQC_RSSEN;
3322         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3323 }
3324
3325 static void
3326 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3327 {
3328         uint8_t  *hash_key;
3329         uint32_t mrqc;
3330         uint32_t rss_key;
3331         uint64_t rss_hf;
3332         uint16_t i;
3333         uint32_t mrqc_reg;
3334         uint32_t rssrk_reg;
3335
3336         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3337         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3338
3339         hash_key = rss_conf->rss_key;
3340         if (hash_key != NULL) {
3341                 /* Fill in RSS hash key */
3342                 for (i = 0; i < 10; i++) {
3343                         rss_key  = hash_key[(i * 4)];
3344                         rss_key |= hash_key[(i * 4) + 1] << 8;
3345                         rss_key |= hash_key[(i * 4) + 2] << 16;
3346                         rss_key |= hash_key[(i * 4) + 3] << 24;
3347                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3348                 }
3349         }
3350
3351         /* Set configured hashing protocols in MRQC register */
3352         rss_hf = rss_conf->rss_hf;
3353         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3354         if (rss_hf & ETH_RSS_IPV4)
3355                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3356         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3357                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3358         if (rss_hf & ETH_RSS_IPV6)
3359                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3360         if (rss_hf & ETH_RSS_IPV6_EX)
3361                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3362         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3363                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3364         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3365                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3366         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3367                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3368         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3369                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3370         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3371                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3372         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3373 }
3374
3375 int
3376 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3377                           struct rte_eth_rss_conf *rss_conf)
3378 {
3379         struct ixgbe_hw *hw;
3380         uint32_t mrqc;
3381         uint64_t rss_hf;
3382         uint32_t mrqc_reg;
3383
3384         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3385
3386         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3387                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3388                         "NIC.");
3389                 return -ENOTSUP;
3390         }
3391         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3392
3393         /*
3394          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3395          *     "RSS enabling cannot be done dynamically while it must be
3396          *      preceded by a software reset"
3397          * Before changing anything, first check that the update RSS operation
3398          * does not attempt to disable RSS, if RSS was enabled at
3399          * initialization time, or does not attempt to enable RSS, if RSS was
3400          * disabled at initialization time.
3401          */
3402         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3403         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3404         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3405                 if (rss_hf != 0) /* Enable RSS */
3406                         return -(EINVAL);
3407                 return 0; /* Nothing to do */
3408         }
3409         /* RSS enabled */
3410         if (rss_hf == 0) /* Disable RSS */
3411                 return -(EINVAL);
3412         ixgbe_hw_rss_hash_set(hw, rss_conf);
3413         return 0;
3414 }
3415
3416 int
3417 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3418                             struct rte_eth_rss_conf *rss_conf)
3419 {
3420         struct ixgbe_hw *hw;
3421         uint8_t *hash_key;
3422         uint32_t mrqc;
3423         uint32_t rss_key;
3424         uint64_t rss_hf;
3425         uint16_t i;
3426         uint32_t mrqc_reg;
3427         uint32_t rssrk_reg;
3428
3429         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3430         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3431         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3432         hash_key = rss_conf->rss_key;
3433         if (hash_key != NULL) {
3434                 /* Return RSS hash key */
3435                 for (i = 0; i < 10; i++) {
3436                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3437                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3438                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3439                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3440                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3441                 }
3442         }
3443
3444         /* Get RSS functions configured in MRQC register */
3445         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3446         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3447                 rss_conf->rss_hf = 0;
3448                 return 0;
3449         }
3450         rss_hf = 0;
3451         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3452                 rss_hf |= ETH_RSS_IPV4;
3453         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3454                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3455         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3456                 rss_hf |= ETH_RSS_IPV6;
3457         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3458                 rss_hf |= ETH_RSS_IPV6_EX;
3459         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3460                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3461         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3462                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3463         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3464                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3465         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3466                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3467         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3468                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3469         rss_conf->rss_hf = rss_hf;
3470         return 0;
3471 }
3472
3473 static void
3474 ixgbe_rss_configure(struct rte_eth_dev *dev)
3475 {
3476         struct rte_eth_rss_conf rss_conf;
3477         struct ixgbe_adapter *adapter;
3478         struct ixgbe_hw *hw;
3479         uint32_t reta;
3480         uint16_t i;
3481         uint16_t j;
3482         uint16_t sp_reta_size;
3483         uint32_t reta_reg;
3484
3485         PMD_INIT_FUNC_TRACE();
3486         adapter = (struct ixgbe_adapter *)dev->data->dev_private;
3487         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3488
3489         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3490
3491         /*
3492          * Fill in redirection table
3493          * The byte-swap is needed because NIC registers are in
3494          * little-endian order.
3495          */
3496         if (adapter->rss_reta_updated == 0) {
3497                 reta = 0;
3498                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3499                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3500
3501                         if (j == dev->data->nb_rx_queues)
3502                                 j = 0;
3503                         reta = (reta << 8) | j;
3504                         if ((i & 3) == 3)
3505                                 IXGBE_WRITE_REG(hw, reta_reg,
3506                                                 rte_bswap32(reta));
3507                 }
3508         }
3509
3510         /*
3511          * Configure the RSS key and the RSS protocols used to compute
3512          * the RSS hash of input packets.
3513          */
3514         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3515         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3516                 ixgbe_rss_disable(dev);
3517                 return;
3518         }
3519         if (rss_conf.rss_key == NULL)
3520                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3521         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3522 }
3523
3524 #define NUM_VFTA_REGISTERS 128
3525 #define NIC_RX_BUFFER_SIZE 0x200
3526 #define X550_RX_BUFFER_SIZE 0x180
3527
3528 static void
3529 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3530 {
3531         struct rte_eth_vmdq_dcb_conf *cfg;
3532         struct ixgbe_hw *hw;
3533         enum rte_eth_nb_pools num_pools;
3534         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3535         uint16_t pbsize;
3536         uint8_t nb_tcs; /* number of traffic classes */
3537         int i;
3538
3539         PMD_INIT_FUNC_TRACE();
3540         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3541         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3542         num_pools = cfg->nb_queue_pools;
3543         /* Check we have a valid number of pools */
3544         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3545                 ixgbe_rss_disable(dev);
3546                 return;
3547         }
3548         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3549         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3550
3551         /*
3552          * RXPBSIZE
3553          * split rx buffer up into sections, each for 1 traffic class
3554          */
3555         switch (hw->mac.type) {
3556         case ixgbe_mac_X550:
3557         case ixgbe_mac_X550EM_x:
3558         case ixgbe_mac_X550EM_a:
3559                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3560                 break;
3561         default:
3562                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3563                 break;
3564         }
3565         for (i = 0; i < nb_tcs; i++) {
3566                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3567
3568                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3569                 /* clear 10 bits. */
3570                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3571                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3572         }
3573         /* zero alloc all unused TCs */
3574         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3575                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3576
3577                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3578                 /* clear 10 bits. */
3579                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3580         }
3581
3582         /* MRQC: enable vmdq and dcb */
3583         mrqc = (num_pools == ETH_16_POOLS) ?
3584                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3585         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3586
3587         /* PFVTCTL: turn on virtualisation and set the default pool */
3588         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3589         if (cfg->enable_default_pool) {
3590                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3591         } else {
3592                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3593         }
3594
3595         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3596
3597         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3598         queue_mapping = 0;
3599         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3600                 /*
3601                  * mapping is done with 3 bits per priority,
3602                  * so shift by i*3 each time
3603                  */
3604                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3605
3606         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3607
3608         /* RTRPCS: DCB related */
3609         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3610
3611         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3612         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3613         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3614         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3615
3616         /* VFTA - enable all vlan filters */
3617         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3618                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3619         }
3620
3621         /* VFRE: pool enabling for receive - 16 or 32 */
3622         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3623                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3624
3625         /*
3626          * MPSAR - allow pools to read specific mac addresses
3627          * In this case, all pools should be able to read from mac addr 0
3628          */
3629         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3630         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3631
3632         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3633         for (i = 0; i < cfg->nb_pool_maps; i++) {
3634                 /* set vlan id in VF register and set the valid bit */
3635                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3636                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3637                 /*
3638                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3639                  * pools, we only need to use the first half of the register
3640                  * i.e. bits 0-31
3641                  */
3642                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3643         }
3644 }
3645
3646 /**
3647  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3648  * @dev: pointer to eth_dev structure
3649  * @dcb_config: pointer to ixgbe_dcb_config structure
3650  */
3651 static void
3652 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3653                        struct ixgbe_dcb_config *dcb_config)
3654 {
3655         uint32_t reg;
3656         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3657
3658         PMD_INIT_FUNC_TRACE();
3659         if (hw->mac.type != ixgbe_mac_82598EB) {
3660                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3661                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3662                 reg |= IXGBE_RTTDCS_ARBDIS;
3663                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3664
3665                 /* Enable DCB for Tx with 8 TCs */
3666                 if (dcb_config->num_tcs.pg_tcs == 8) {
3667                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3668                 } else {
3669                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3670                 }
3671                 if (dcb_config->vt_mode)
3672                         reg |= IXGBE_MTQC_VT_ENA;
3673                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3674
3675                 /* Enable the Tx desc arbiter */
3676                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3677                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3678                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3679
3680                 /* Enable Security TX Buffer IFG for DCB */
3681                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3682                 reg |= IXGBE_SECTX_DCB;
3683                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3684         }
3685 }
3686
3687 /**
3688  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3689  * @dev: pointer to rte_eth_dev structure
3690  * @dcb_config: pointer to ixgbe_dcb_config structure
3691  */
3692 static void
3693 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3694                         struct ixgbe_dcb_config *dcb_config)
3695 {
3696         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3697                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3698         struct ixgbe_hw *hw =
3699                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3700
3701         PMD_INIT_FUNC_TRACE();
3702         if (hw->mac.type != ixgbe_mac_82598EB)
3703                 /*PF VF Transmit Enable*/
3704                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3705                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3706
3707         /*Configure general DCB TX parameters*/
3708         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3709 }
3710
3711 static void
3712 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3713                         struct ixgbe_dcb_config *dcb_config)
3714 {
3715         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3716                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3717         struct ixgbe_dcb_tc_config *tc;
3718         uint8_t i, j;
3719
3720         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3721         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3722                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3723                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3724         } else {
3725                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3726                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3727         }
3728
3729         /* Initialize User Priority to Traffic Class mapping */
3730         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3731                 tc = &dcb_config->tc_config[j];
3732                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3733         }
3734
3735         /* User Priority to Traffic Class mapping */
3736         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3737                 j = vmdq_rx_conf->dcb_tc[i];
3738                 tc = &dcb_config->tc_config[j];
3739                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3740                                                 (uint8_t)(1 << i);
3741         }
3742 }
3743
3744 static void
3745 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3746                         struct ixgbe_dcb_config *dcb_config)
3747 {
3748         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3749                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3750         struct ixgbe_dcb_tc_config *tc;
3751         uint8_t i, j;
3752
3753         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3754         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3755                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3756                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3757         } else {
3758                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3759                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3760         }
3761
3762         /* Initialize User Priority to Traffic Class mapping */
3763         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3764                 tc = &dcb_config->tc_config[j];
3765                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3766         }
3767
3768         /* User Priority to Traffic Class mapping */
3769         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3770                 j = vmdq_tx_conf->dcb_tc[i];
3771                 tc = &dcb_config->tc_config[j];
3772                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3773                                                 (uint8_t)(1 << i);
3774         }
3775 }
3776
3777 static void
3778 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3779                 struct ixgbe_dcb_config *dcb_config)
3780 {
3781         struct rte_eth_dcb_rx_conf *rx_conf =
3782                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3783         struct ixgbe_dcb_tc_config *tc;
3784         uint8_t i, j;
3785
3786         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3787         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3788
3789         /* Initialize User Priority to Traffic Class mapping */
3790         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3791                 tc = &dcb_config->tc_config[j];
3792                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3793         }
3794
3795         /* User Priority to Traffic Class mapping */
3796         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3797                 j = rx_conf->dcb_tc[i];
3798                 tc = &dcb_config->tc_config[j];
3799                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3800                                                 (uint8_t)(1 << i);
3801         }
3802 }
3803
3804 static void
3805 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3806                 struct ixgbe_dcb_config *dcb_config)
3807 {
3808         struct rte_eth_dcb_tx_conf *tx_conf =
3809                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3810         struct ixgbe_dcb_tc_config *tc;
3811         uint8_t i, j;
3812
3813         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3814         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3815
3816         /* Initialize User Priority to Traffic Class mapping */
3817         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3818                 tc = &dcb_config->tc_config[j];
3819                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3820         }
3821
3822         /* User Priority to Traffic Class mapping */
3823         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3824                 j = tx_conf->dcb_tc[i];
3825                 tc = &dcb_config->tc_config[j];
3826                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3827                                                 (uint8_t)(1 << i);
3828         }
3829 }
3830
3831 /**
3832  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3833  * @dev: pointer to eth_dev structure
3834  * @dcb_config: pointer to ixgbe_dcb_config structure
3835  */
3836 static void
3837 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3838                        struct ixgbe_dcb_config *dcb_config)
3839 {
3840         uint32_t reg;
3841         uint32_t vlanctrl;
3842         uint8_t i;
3843         uint32_t q;
3844         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3845
3846         PMD_INIT_FUNC_TRACE();
3847         /*
3848          * Disable the arbiter before changing parameters
3849          * (always enable recycle mode; WSP)
3850          */
3851         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3852         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3853
3854         if (hw->mac.type != ixgbe_mac_82598EB) {
3855                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3856                 if (dcb_config->num_tcs.pg_tcs == 4) {
3857                         if (dcb_config->vt_mode)
3858                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3859                                         IXGBE_MRQC_VMDQRT4TCEN;
3860                         else {
3861                                 /* no matter the mode is DCB or DCB_RSS, just
3862                                  * set the MRQE to RSSXTCEN. RSS is controlled
3863                                  * by RSS_FIELD
3864                                  */
3865                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3866                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3867                                         IXGBE_MRQC_RTRSS4TCEN;
3868                         }
3869                 }
3870                 if (dcb_config->num_tcs.pg_tcs == 8) {
3871                         if (dcb_config->vt_mode)
3872                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3873                                         IXGBE_MRQC_VMDQRT8TCEN;
3874                         else {
3875                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3876                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3877                                         IXGBE_MRQC_RTRSS8TCEN;
3878                         }
3879                 }
3880
3881                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3882
3883                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3884                         /* Disable drop for all queues in VMDQ mode*/
3885                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3886                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3887                                                 (IXGBE_QDE_WRITE |
3888                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3889                 } else {
3890                         /* Enable drop for all queues in SRIOV mode */
3891                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3892                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3893                                                 (IXGBE_QDE_WRITE |
3894                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3895                                                  IXGBE_QDE_ENABLE));
3896                 }
3897         }
3898
3899         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3900         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3901         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3902         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3903
3904         /* VFTA - enable all vlan filters */
3905         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3906                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3907         }
3908
3909         /*
3910          * Configure Rx packet plane (recycle mode; WSP) and
3911          * enable arbiter
3912          */
3913         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3914         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3915 }
3916
3917 static void
3918 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3919                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3920 {
3921         switch (hw->mac.type) {
3922         case ixgbe_mac_82598EB:
3923                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3924                 break;
3925         case ixgbe_mac_82599EB:
3926         case ixgbe_mac_X540:
3927         case ixgbe_mac_X550:
3928         case ixgbe_mac_X550EM_x:
3929         case ixgbe_mac_X550EM_a:
3930                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3931                                                   tsa, map);
3932                 break;
3933         default:
3934                 break;
3935         }
3936 }
3937
3938 static void
3939 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3940                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3941 {
3942         switch (hw->mac.type) {
3943         case ixgbe_mac_82598EB:
3944                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3945                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3946                 break;
3947         case ixgbe_mac_82599EB:
3948         case ixgbe_mac_X540:
3949         case ixgbe_mac_X550:
3950         case ixgbe_mac_X550EM_x:
3951         case ixgbe_mac_X550EM_a:
3952                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3953                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3954                 break;
3955         default:
3956                 break;
3957         }
3958 }
3959
3960 #define DCB_RX_CONFIG  1
3961 #define DCB_TX_CONFIG  1
3962 #define DCB_TX_PB      1024
3963 /**
3964  * ixgbe_dcb_hw_configure - Enable DCB and configure
3965  * general DCB in VT mode and non-VT mode parameters
3966  * @dev: pointer to rte_eth_dev structure
3967  * @dcb_config: pointer to ixgbe_dcb_config structure
3968  */
3969 static int
3970 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3971                         struct ixgbe_dcb_config *dcb_config)
3972 {
3973         int     ret = 0;
3974         uint8_t i, pfc_en, nb_tcs;
3975         uint16_t pbsize, rx_buffer_size;
3976         uint8_t config_dcb_rx = 0;
3977         uint8_t config_dcb_tx = 0;
3978         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3979         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3980         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3981         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3982         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3983         struct ixgbe_dcb_tc_config *tc;
3984         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
3985                 RTE_ETHER_CRC_LEN;
3986         struct ixgbe_hw *hw =
3987                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3988         struct ixgbe_bw_conf *bw_conf =
3989                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3990
3991         switch (dev->data->dev_conf.rxmode.mq_mode) {
3992         case ETH_MQ_RX_VMDQ_DCB:
3993                 dcb_config->vt_mode = true;
3994                 if (hw->mac.type != ixgbe_mac_82598EB) {
3995                         config_dcb_rx = DCB_RX_CONFIG;
3996                         /*
3997                          *get dcb and VT rx configuration parameters
3998                          *from rte_eth_conf
3999                          */
4000                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4001                         /*Configure general VMDQ and DCB RX parameters*/
4002                         ixgbe_vmdq_dcb_configure(dev);
4003                 }
4004                 break;
4005         case ETH_MQ_RX_DCB:
4006         case ETH_MQ_RX_DCB_RSS:
4007                 dcb_config->vt_mode = false;
4008                 config_dcb_rx = DCB_RX_CONFIG;
4009                 /* Get dcb TX configuration parameters from rte_eth_conf */
4010                 ixgbe_dcb_rx_config(dev, dcb_config);
4011                 /*Configure general DCB RX parameters*/
4012                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4013                 break;
4014         default:
4015                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4016                 break;
4017         }
4018         switch (dev->data->dev_conf.txmode.mq_mode) {
4019         case ETH_MQ_TX_VMDQ_DCB:
4020                 dcb_config->vt_mode = true;
4021                 config_dcb_tx = DCB_TX_CONFIG;
4022                 /* get DCB and VT TX configuration parameters
4023                  * from rte_eth_conf
4024                  */
4025                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4026                 /*Configure general VMDQ and DCB TX parameters*/
4027                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4028                 break;
4029
4030         case ETH_MQ_TX_DCB:
4031                 dcb_config->vt_mode = false;
4032                 config_dcb_tx = DCB_TX_CONFIG;
4033                 /*get DCB TX configuration parameters from rte_eth_conf*/
4034                 ixgbe_dcb_tx_config(dev, dcb_config);
4035                 /*Configure general DCB TX parameters*/
4036                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4037                 break;
4038         default:
4039                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4040                 break;
4041         }
4042
4043         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4044         /* Unpack map */
4045         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4046         if (nb_tcs == ETH_4_TCS) {
4047                 /* Avoid un-configured priority mapping to TC0 */
4048                 uint8_t j = 4;
4049                 uint8_t mask = 0xFF;
4050
4051                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4052                         mask = (uint8_t)(mask & (~(1 << map[i])));
4053                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4054                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4055                                 map[j++] = i;
4056                         mask >>= 1;
4057                 }
4058                 /* Re-configure 4 TCs BW */
4059                 for (i = 0; i < nb_tcs; i++) {
4060                         tc = &dcb_config->tc_config[i];
4061                         if (bw_conf->tc_num != nb_tcs)
4062                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4063                                         (uint8_t)(100 / nb_tcs);
4064                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4065                                                 (uint8_t)(100 / nb_tcs);
4066                 }
4067                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4068                         tc = &dcb_config->tc_config[i];
4069                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4070                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4071                 }
4072         } else {
4073                 /* Re-configure 8 TCs BW */
4074                 for (i = 0; i < nb_tcs; i++) {
4075                         tc = &dcb_config->tc_config[i];
4076                         if (bw_conf->tc_num != nb_tcs)
4077                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4078                                         (uint8_t)(100 / nb_tcs + (i & 1));
4079                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4080                                 (uint8_t)(100 / nb_tcs + (i & 1));
4081                 }
4082         }
4083
4084         switch (hw->mac.type) {
4085         case ixgbe_mac_X550:
4086         case ixgbe_mac_X550EM_x:
4087         case ixgbe_mac_X550EM_a:
4088                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4089                 break;
4090         default:
4091                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4092                 break;
4093         }
4094
4095         if (config_dcb_rx) {
4096                 /* Set RX buffer size */
4097                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4098                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4099
4100                 for (i = 0; i < nb_tcs; i++) {
4101                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4102                 }
4103                 /* zero alloc all unused TCs */
4104                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4105                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4106                 }
4107         }
4108         if (config_dcb_tx) {
4109                 /* Only support an equally distributed
4110                  *  Tx packet buffer strategy.
4111                  */
4112                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4113                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4114
4115                 for (i = 0; i < nb_tcs; i++) {
4116                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4117                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4118                 }
4119                 /* Clear unused TCs, if any, to zero buffer size*/
4120                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4121                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4122                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4123                 }
4124         }
4125
4126         /*Calculates traffic class credits*/
4127         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4128                                 IXGBE_DCB_TX_CONFIG);
4129         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4130                                 IXGBE_DCB_RX_CONFIG);
4131
4132         if (config_dcb_rx) {
4133                 /* Unpack CEE standard containers */
4134                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4135                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4136                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4137                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4138                 /* Configure PG(ETS) RX */
4139                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4140         }
4141
4142         if (config_dcb_tx) {
4143                 /* Unpack CEE standard containers */
4144                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4145                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4146                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4147                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4148                 /* Configure PG(ETS) TX */
4149                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4150         }
4151
4152         /*Configure queue statistics registers*/
4153         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4154
4155         /* Check if the PFC is supported */
4156         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4157                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4158                 for (i = 0; i < nb_tcs; i++) {
4159                         /*
4160                         * If the TC count is 8,and the default high_water is 48,
4161                         * the low_water is 16 as default.
4162                         */
4163                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4164                         hw->fc.low_water[i] = pbsize / 4;
4165                         /* Enable pfc for this TC */
4166                         tc = &dcb_config->tc_config[i];
4167                         tc->pfc = ixgbe_dcb_pfc_enabled;
4168                 }
4169                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4170                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4171                         pfc_en &= 0x0F;
4172                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4173         }
4174
4175         return ret;
4176 }
4177
4178 /**
4179  * ixgbe_configure_dcb - Configure DCB  Hardware
4180  * @dev: pointer to rte_eth_dev
4181  */
4182 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4183 {
4184         struct ixgbe_dcb_config *dcb_cfg =
4185                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4186         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4187
4188         PMD_INIT_FUNC_TRACE();
4189
4190         /* check support mq_mode for DCB */
4191         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4192             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4193             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4194                 return;
4195
4196         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4197                 return;
4198
4199         /** Configure DCB hardware **/
4200         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4201 }
4202
4203 /*
4204  * VMDq only support for 10 GbE NIC.
4205  */
4206 static void
4207 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4208 {
4209         struct rte_eth_vmdq_rx_conf *cfg;
4210         struct ixgbe_hw *hw;
4211         enum rte_eth_nb_pools num_pools;
4212         uint32_t mrqc, vt_ctl, vlanctrl;
4213         uint32_t vmolr = 0;
4214         int i;
4215
4216         PMD_INIT_FUNC_TRACE();
4217         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4218         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4219         num_pools = cfg->nb_queue_pools;
4220
4221         ixgbe_rss_disable(dev);
4222
4223         /* MRQC: enable vmdq */
4224         mrqc = IXGBE_MRQC_VMDQEN;
4225         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4226
4227         /* PFVTCTL: turn on virtualisation and set the default pool */
4228         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4229         if (cfg->enable_default_pool)
4230                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4231         else
4232                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4233
4234         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4235
4236         for (i = 0; i < (int)num_pools; i++) {
4237                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4238                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4239         }
4240
4241         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4242         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4243         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4244         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4245
4246         /* VFTA - enable all vlan filters */
4247         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4248                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4249
4250         /* VFRE: pool enabling for receive - 64 */
4251         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4252         if (num_pools == ETH_64_POOLS)
4253                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4254
4255         /*
4256          * MPSAR - allow pools to read specific mac addresses
4257          * In this case, all pools should be able to read from mac addr 0
4258          */
4259         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4260         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4261
4262         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4263         for (i = 0; i < cfg->nb_pool_maps; i++) {
4264                 /* set vlan id in VF register and set the valid bit */
4265                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4266                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4267                 /*
4268                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4269                  * pools, we only need to use the first half of the register
4270                  * i.e. bits 0-31
4271                  */
4272                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4273                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4274                                         (cfg->pool_map[i].pools & UINT32_MAX));
4275                 else
4276                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4277                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4278
4279         }
4280
4281         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4282         if (cfg->enable_loop_back) {
4283                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4284                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4285                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4286         }
4287
4288         IXGBE_WRITE_FLUSH(hw);
4289 }
4290
4291 /*
4292  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4293  * @hw: pointer to hardware structure
4294  */
4295 static void
4296 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4297 {
4298         uint32_t reg;
4299         uint32_t q;
4300
4301         PMD_INIT_FUNC_TRACE();
4302         /*PF VF Transmit Enable*/
4303         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4304         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4305
4306         /* Disable the Tx desc arbiter so that MTQC can be changed */
4307         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4308         reg |= IXGBE_RTTDCS_ARBDIS;
4309         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4310
4311         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4312         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4313
4314         /* Disable drop for all queues */
4315         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4316                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4317                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4318
4319         /* Enable the Tx desc arbiter */
4320         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4321         reg &= ~IXGBE_RTTDCS_ARBDIS;
4322         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4323
4324         IXGBE_WRITE_FLUSH(hw);
4325 }
4326
4327 static int __attribute__((cold))
4328 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4329 {
4330         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4331         uint64_t dma_addr;
4332         unsigned int i;
4333
4334         /* Initialize software ring entries */
4335         for (i = 0; i < rxq->nb_rx_desc; i++) {
4336                 volatile union ixgbe_adv_rx_desc *rxd;
4337                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4338
4339                 if (mbuf == NULL) {
4340                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4341                                      (unsigned) rxq->queue_id);
4342                         return -ENOMEM;
4343                 }
4344
4345                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4346                 mbuf->port = rxq->port_id;
4347
4348                 dma_addr =
4349                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4350                 rxd = &rxq->rx_ring[i];
4351                 rxd->read.hdr_addr = 0;
4352                 rxd->read.pkt_addr = dma_addr;
4353                 rxe[i].mbuf = mbuf;
4354         }
4355
4356         return 0;
4357 }
4358
4359 static int
4360 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4361 {
4362         struct ixgbe_hw *hw;
4363         uint32_t mrqc;
4364
4365         ixgbe_rss_configure(dev);
4366
4367         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4368
4369         /* MRQC: enable VF RSS */
4370         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4371         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4372         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4373         case ETH_64_POOLS:
4374                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4375                 break;
4376
4377         case ETH_32_POOLS:
4378                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4379                 break;
4380
4381         default:
4382                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4383                 return -EINVAL;
4384         }
4385
4386         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4387
4388         return 0;
4389 }
4390
4391 static int
4392 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4393 {
4394         struct ixgbe_hw *hw =
4395                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4396
4397         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4398         case ETH_64_POOLS:
4399                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4400                         IXGBE_MRQC_VMDQEN);
4401                 break;
4402
4403         case ETH_32_POOLS:
4404                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4405                         IXGBE_MRQC_VMDQRT4TCEN);
4406                 break;
4407
4408         case ETH_16_POOLS:
4409                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4410                         IXGBE_MRQC_VMDQRT8TCEN);
4411                 break;
4412         default:
4413                 PMD_INIT_LOG(ERR,
4414                         "invalid pool number in IOV mode");
4415                 break;
4416         }
4417         return 0;
4418 }
4419
4420 static int
4421 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4422 {
4423         struct ixgbe_hw *hw =
4424                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4425
4426         if (hw->mac.type == ixgbe_mac_82598EB)
4427                 return 0;
4428
4429         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4430                 /*
4431                  * SRIOV inactive scheme
4432                  * any DCB/RSS w/o VMDq multi-queue setting
4433                  */
4434                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4435                 case ETH_MQ_RX_RSS:
4436                 case ETH_MQ_RX_DCB_RSS:
4437                 case ETH_MQ_RX_VMDQ_RSS:
4438                         ixgbe_rss_configure(dev);
4439                         break;
4440
4441                 case ETH_MQ_RX_VMDQ_DCB:
4442                         ixgbe_vmdq_dcb_configure(dev);
4443                         break;
4444
4445                 case ETH_MQ_RX_VMDQ_ONLY:
4446                         ixgbe_vmdq_rx_hw_configure(dev);
4447                         break;
4448
4449                 case ETH_MQ_RX_NONE:
4450                 default:
4451                         /* if mq_mode is none, disable rss mode.*/
4452                         ixgbe_rss_disable(dev);
4453                         break;
4454                 }
4455         } else {
4456                 /* SRIOV active scheme
4457                  * Support RSS together with SRIOV.
4458                  */
4459                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4460                 case ETH_MQ_RX_RSS:
4461                 case ETH_MQ_RX_VMDQ_RSS:
4462                         ixgbe_config_vf_rss(dev);
4463                         break;
4464                 case ETH_MQ_RX_VMDQ_DCB:
4465                 case ETH_MQ_RX_DCB:
4466                 /* In SRIOV, the configuration is the same as VMDq case */
4467                         ixgbe_vmdq_dcb_configure(dev);
4468                         break;
4469                 /* DCB/RSS together with SRIOV is not supported */
4470                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4471                 case ETH_MQ_RX_DCB_RSS:
4472                         PMD_INIT_LOG(ERR,
4473                                 "Could not support DCB/RSS with VMDq & SRIOV");
4474                         return -1;
4475                 default:
4476                         ixgbe_config_vf_default(dev);
4477                         break;
4478                 }
4479         }
4480
4481         return 0;
4482 }
4483
4484 static int
4485 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4486 {
4487         struct ixgbe_hw *hw =
4488                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4489         uint32_t mtqc;
4490         uint32_t rttdcs;
4491
4492         if (hw->mac.type == ixgbe_mac_82598EB)
4493                 return 0;
4494
4495         /* disable arbiter before setting MTQC */
4496         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4497         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4498         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4499
4500         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4501                 /*
4502                  * SRIOV inactive scheme
4503                  * any DCB w/o VMDq multi-queue setting
4504                  */
4505                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4506                         ixgbe_vmdq_tx_hw_configure(hw);
4507                 else {
4508                         mtqc = IXGBE_MTQC_64Q_1PB;
4509                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4510                 }
4511         } else {
4512                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4513
4514                 /*
4515                  * SRIOV active scheme
4516                  * FIXME if support DCB together with VMDq & SRIOV
4517                  */
4518                 case ETH_64_POOLS:
4519                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4520                         break;
4521                 case ETH_32_POOLS:
4522                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4523                         break;
4524                 case ETH_16_POOLS:
4525                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4526                                 IXGBE_MTQC_8TC_8TQ;
4527                         break;
4528                 default:
4529                         mtqc = IXGBE_MTQC_64Q_1PB;
4530                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4531                 }
4532                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4533         }
4534
4535         /* re-enable arbiter */
4536         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4537         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4538
4539         return 0;
4540 }
4541
4542 /**
4543  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4544  *
4545  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4546  * spec rev. 3.0 chapter 8.2.3.8.13.
4547  *
4548  * @pool Memory pool of the Rx queue
4549  */
4550 static inline uint32_t
4551 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4552 {
4553         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4554
4555         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4556         uint16_t maxdesc =
4557                 RTE_IPV4_MAX_PKT_LEN /
4558                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4559
4560         if (maxdesc >= 16)
4561                 return IXGBE_RSCCTL_MAXDESC_16;
4562         else if (maxdesc >= 8)
4563                 return IXGBE_RSCCTL_MAXDESC_8;
4564         else if (maxdesc >= 4)
4565                 return IXGBE_RSCCTL_MAXDESC_4;
4566         else
4567                 return IXGBE_RSCCTL_MAXDESC_1;
4568 }
4569
4570 /**
4571  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4572  * interrupt
4573  *
4574  * (Taken from FreeBSD tree)
4575  * (yes this is all very magic and confusing :)
4576  *
4577  * @dev port handle
4578  * @entry the register array entry
4579  * @vector the MSIX vector for this queue
4580  * @type RX/TX/MISC
4581  */
4582 static void
4583 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4584 {
4585         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4586         u32 ivar, index;
4587
4588         vector |= IXGBE_IVAR_ALLOC_VAL;
4589
4590         switch (hw->mac.type) {
4591
4592         case ixgbe_mac_82598EB:
4593                 if (type == -1)
4594                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4595                 else
4596                         entry += (type * 64);
4597                 index = (entry >> 2) & 0x1F;
4598                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4599                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4600                 ivar |= (vector << (8 * (entry & 0x3)));
4601                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4602                 break;
4603
4604         case ixgbe_mac_82599EB:
4605         case ixgbe_mac_X540:
4606                 if (type == -1) { /* MISC IVAR */
4607                         index = (entry & 1) * 8;
4608                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4609                         ivar &= ~(0xFF << index);
4610                         ivar |= (vector << index);
4611                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4612                 } else {        /* RX/TX IVARS */
4613                         index = (16 * (entry & 1)) + (8 * type);
4614                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4615                         ivar &= ~(0xFF << index);
4616                         ivar |= (vector << index);
4617                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4618                 }
4619
4620                 break;
4621
4622         default:
4623                 break;
4624         }
4625 }
4626
4627 void __attribute__((cold))
4628 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4629 {
4630         uint16_t i, rx_using_sse;
4631         struct ixgbe_adapter *adapter =
4632                 (struct ixgbe_adapter *)dev->data->dev_private;
4633
4634         /*
4635          * In order to allow Vector Rx there are a few configuration
4636          * conditions to be met and Rx Bulk Allocation should be allowed.
4637          */
4638         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4639             !adapter->rx_bulk_alloc_allowed) {
4640                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4641                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4642                                     "not enabled",
4643                              dev->data->port_id);
4644
4645                 adapter->rx_vec_allowed = false;
4646         }
4647
4648         /*
4649          * Initialize the appropriate LRO callback.
4650          *
4651          * If all queues satisfy the bulk allocation preconditions
4652          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4653          * Otherwise use a single allocation version.
4654          */
4655         if (dev->data->lro) {
4656                 if (adapter->rx_bulk_alloc_allowed) {
4657                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4658                                            "allocation version");
4659                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4660                 } else {
4661                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4662                                            "allocation version");
4663                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4664                 }
4665         } else if (dev->data->scattered_rx) {
4666                 /*
4667                  * Set the non-LRO scattered callback: there are Vector and
4668                  * single allocation versions.
4669                  */
4670                 if (adapter->rx_vec_allowed) {
4671                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4672                                             "callback (port=%d).",
4673                                      dev->data->port_id);
4674
4675                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4676                 } else if (adapter->rx_bulk_alloc_allowed) {
4677                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4678                                            "allocation callback (port=%d).",
4679                                      dev->data->port_id);
4680                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4681                 } else {
4682                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4683                                             "single allocation) "
4684                                             "Scattered Rx callback "
4685                                             "(port=%d).",
4686                                      dev->data->port_id);
4687
4688                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4689                 }
4690         /*
4691          * Below we set "simple" callbacks according to port/queues parameters.
4692          * If parameters allow we are going to choose between the following
4693          * callbacks:
4694          *    - Vector
4695          *    - Bulk Allocation
4696          *    - Single buffer allocation (the simplest one)
4697          */
4698         } else if (adapter->rx_vec_allowed) {
4699                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4700                                     "burst size no less than %d (port=%d).",
4701                              RTE_IXGBE_DESCS_PER_LOOP,
4702                              dev->data->port_id);
4703
4704                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4705         } else if (adapter->rx_bulk_alloc_allowed) {
4706                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4707                                     "satisfied. Rx Burst Bulk Alloc function "
4708                                     "will be used on port=%d.",
4709                              dev->data->port_id);
4710
4711                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4712         } else {
4713                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4714                                     "satisfied, or Scattered Rx is requested "
4715                                     "(port=%d).",
4716                              dev->data->port_id);
4717
4718                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4719         }
4720
4721         /* Propagate information about RX function choice through all queues. */
4722
4723         rx_using_sse =
4724                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4725                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4726
4727         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4728                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4729
4730                 rxq->rx_using_sse = rx_using_sse;
4731 #ifdef RTE_LIBRTE_SECURITY
4732                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4733                                 DEV_RX_OFFLOAD_SECURITY);
4734 #endif
4735         }
4736 }
4737
4738 /**
4739  * ixgbe_set_rsc - configure RSC related port HW registers
4740  *
4741  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4742  * of 82599 Spec (x540 configuration is virtually the same).
4743  *
4744  * @dev port handle
4745  *
4746  * Returns 0 in case of success or a non-zero error code
4747  */
4748 static int
4749 ixgbe_set_rsc(struct rte_eth_dev *dev)
4750 {
4751         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4752         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4753         struct rte_eth_dev_info dev_info = { 0 };
4754         bool rsc_capable = false;
4755         uint16_t i;
4756         uint32_t rdrxctl;
4757         uint32_t rfctl;
4758
4759         /* Sanity check */
4760         dev->dev_ops->dev_infos_get(dev, &dev_info);
4761         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4762                 rsc_capable = true;
4763
4764         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4765                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4766                                    "support it");
4767                 return -EINVAL;
4768         }
4769
4770         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4771
4772         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4773              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4774                 /*
4775                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4776                  * 3.0 RSC configuration requires HW CRC stripping being
4777                  * enabled. If user requested both HW CRC stripping off
4778                  * and RSC on - return an error.
4779                  */
4780                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4781                                     "is disabled");
4782                 return -EINVAL;
4783         }
4784
4785         /* RFCTL configuration  */
4786         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4787         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4788                 /*
4789                  * Since NFS packets coalescing is not supported - clear
4790                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4791                  * enabled.
4792                  */
4793                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4794                            IXGBE_RFCTL_NFSR_DIS);
4795         else
4796                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4797         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4798
4799         /* If LRO hasn't been requested - we are done here. */
4800         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4801                 return 0;
4802
4803         /* Set RDRXCTL.RSCACKC bit */
4804         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4805         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4806         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4807
4808         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4809         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4810                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4811                 uint32_t srrctl =
4812                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4813                 uint32_t rscctl =
4814                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4815                 uint32_t psrtype =
4816                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4817                 uint32_t eitr =
4818                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4819
4820                 /*
4821                  * ixgbe PMD doesn't support header-split at the moment.
4822                  *
4823                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4824                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4825                  * should be configured even if header split is not
4826                  * enabled. We will configure it 128 bytes following the
4827                  * recommendation in the spec.
4828                  */
4829                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4830                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4831                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4832
4833                 /*
4834                  * TODO: Consider setting the Receive Descriptor Minimum
4835                  * Threshold Size for an RSC case. This is not an obviously
4836                  * beneficiary option but the one worth considering...
4837                  */
4838
4839                 rscctl |= IXGBE_RSCCTL_RSCEN;
4840                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4841                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4842
4843                 /*
4844                  * RSC: Set ITR interval corresponding to 2K ints/s.
4845                  *
4846                  * Full-sized RSC aggregations for a 10Gb/s link will
4847                  * arrive at about 20K aggregation/s rate.
4848                  *
4849                  * 2K inst/s rate will make only 10% of the
4850                  * aggregations to be closed due to the interrupt timer
4851                  * expiration for a streaming at wire-speed case.
4852                  *
4853                  * For a sparse streaming case this setting will yield
4854                  * at most 500us latency for a single RSC aggregation.
4855                  */
4856                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4857                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4858                 eitr |= IXGBE_EITR_CNT_WDIS;
4859
4860                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4861                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4862                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4863                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4864
4865                 /*
4866                  * RSC requires the mapping of the queue to the
4867                  * interrupt vector.
4868                  */
4869                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4870         }
4871
4872         dev->data->lro = 1;
4873
4874         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4875
4876         return 0;
4877 }
4878
4879 /*
4880  * Initializes Receive Unit.
4881  */
4882 int __attribute__((cold))
4883 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4884 {
4885         struct ixgbe_hw     *hw;
4886         struct ixgbe_rx_queue *rxq;
4887         uint64_t bus_addr;
4888         uint32_t rxctrl;
4889         uint32_t fctrl;
4890         uint32_t hlreg0;
4891         uint32_t maxfrs;
4892         uint32_t srrctl;
4893         uint32_t rdrxctl;
4894         uint32_t rxcsum;
4895         uint16_t buf_size;
4896         uint16_t i;
4897         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4898         int rc;
4899
4900         PMD_INIT_FUNC_TRACE();
4901         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4902
4903         /*
4904          * Make sure receives are disabled while setting
4905          * up the RX context (registers, descriptor rings, etc.).
4906          */
4907         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4908         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4909
4910         /* Enable receipt of broadcasted frames */
4911         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4912         fctrl |= IXGBE_FCTRL_BAM;
4913         fctrl |= IXGBE_FCTRL_DPF;
4914         fctrl |= IXGBE_FCTRL_PMCF;
4915         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4916
4917         /*
4918          * Configure CRC stripping, if any.
4919          */
4920         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4921         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4922                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4923         else
4924                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4925
4926         /*
4927          * Configure jumbo frame support, if any.
4928          */
4929         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
4930                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4931                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4932                 maxfrs &= 0x0000FFFF;
4933                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4934                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4935         } else
4936                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4937
4938         /*
4939          * If loopback mode is configured, set LPBK bit.
4940          */
4941         if (dev->data->dev_conf.lpbk_mode != 0) {
4942                 rc = ixgbe_check_supported_loopback_mode(dev);
4943                 if (rc < 0) {
4944                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
4945                         return rc;
4946                 }
4947                 hlreg0 |= IXGBE_HLREG0_LPBK;
4948         } else {
4949                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4950         }
4951
4952         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4953
4954         /*
4955          * Assume no header split and no VLAN strip support
4956          * on any Rx queue first .
4957          */
4958         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
4959         /* Setup RX queues */
4960         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4961                 rxq = dev->data->rx_queues[i];
4962
4963                 /*
4964                  * Reset crc_len in case it was changed after queue setup by a
4965                  * call to configure.
4966                  */
4967                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4968                         rxq->crc_len = RTE_ETHER_CRC_LEN;
4969                 else
4970                         rxq->crc_len = 0;
4971
4972                 /* Setup the Base and Length of the Rx Descriptor Rings */
4973                 bus_addr = rxq->rx_ring_phys_addr;
4974                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4975                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4976                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4977                                 (uint32_t)(bus_addr >> 32));
4978                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4979                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4980                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4981                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4982
4983                 /* Configure the SRRCTL register */
4984                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4985
4986                 /* Set if packets are dropped when no descriptors available */
4987                 if (rxq->drop_en)
4988                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4989
4990                 /*
4991                  * Configure the RX buffer size in the BSIZEPACKET field of
4992                  * the SRRCTL register of the queue.
4993                  * The value is in 1 KB resolution. Valid values can be from
4994                  * 1 KB to 16 KB.
4995                  */
4996                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4997                         RTE_PKTMBUF_HEADROOM);
4998                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4999                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5000
5001                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5002
5003                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5004                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5005
5006                 /* It adds dual VLAN length for supporting dual VLAN */
5007                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5008                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5009                         dev->data->scattered_rx = 1;
5010                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5011                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5012         }
5013
5014         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5015                 dev->data->scattered_rx = 1;
5016
5017         /*
5018          * Device configured with multiple RX queues.
5019          */
5020         ixgbe_dev_mq_rx_configure(dev);
5021
5022         /*
5023          * Setup the Checksum Register.
5024          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5025          * Enable IP/L4 checkum computation by hardware if requested to do so.
5026          */
5027         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5028         rxcsum |= IXGBE_RXCSUM_PCSD;
5029         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5030                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5031         else
5032                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5033
5034         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5035
5036         if (hw->mac.type == ixgbe_mac_82599EB ||
5037             hw->mac.type == ixgbe_mac_X540) {
5038                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5039                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5040                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5041                 else
5042                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5043                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5044                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5045         }
5046
5047         rc = ixgbe_set_rsc(dev);
5048         if (rc)
5049                 return rc;
5050
5051         ixgbe_set_rx_function(dev);
5052
5053         return 0;
5054 }
5055
5056 /*
5057  * Initializes Transmit Unit.
5058  */
5059 void __attribute__((cold))
5060 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5061 {
5062         struct ixgbe_hw     *hw;
5063         struct ixgbe_tx_queue *txq;
5064         uint64_t bus_addr;
5065         uint32_t hlreg0;
5066         uint32_t txctrl;
5067         uint16_t i;
5068
5069         PMD_INIT_FUNC_TRACE();
5070         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5071
5072         /* Enable TX CRC (checksum offload requirement) and hw padding
5073          * (TSO requirement)
5074          */
5075         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5076         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5077         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5078
5079         /* Setup the Base and Length of the Tx Descriptor Rings */
5080         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5081                 txq = dev->data->tx_queues[i];
5082
5083                 bus_addr = txq->tx_ring_phys_addr;
5084                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5085                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5086                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5087                                 (uint32_t)(bus_addr >> 32));
5088                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5089                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5090                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5091                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5092                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5093
5094                 /*
5095                  * Disable Tx Head Writeback RO bit, since this hoses
5096                  * bookkeeping if things aren't delivered in order.
5097                  */
5098                 switch (hw->mac.type) {
5099                 case ixgbe_mac_82598EB:
5100                         txctrl = IXGBE_READ_REG(hw,
5101                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5102                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5103                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5104                                         txctrl);
5105                         break;
5106
5107                 case ixgbe_mac_82599EB:
5108                 case ixgbe_mac_X540:
5109                 case ixgbe_mac_X550:
5110                 case ixgbe_mac_X550EM_x:
5111                 case ixgbe_mac_X550EM_a:
5112                 default:
5113                         txctrl = IXGBE_READ_REG(hw,
5114                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5115                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5116                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5117                                         txctrl);
5118                         break;
5119                 }
5120         }
5121
5122         /* Device configured with multiple TX queues. */
5123         ixgbe_dev_mq_tx_configure(dev);
5124 }
5125
5126 /*
5127  * Check if requested loopback mode is supported
5128  */
5129 int
5130 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5131 {
5132         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5133
5134         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5135                 if (hw->mac.type == ixgbe_mac_82599EB ||
5136                      hw->mac.type == ixgbe_mac_X540 ||
5137                      hw->mac.type == ixgbe_mac_X550 ||
5138                      hw->mac.type == ixgbe_mac_X550EM_x ||
5139                      hw->mac.type == ixgbe_mac_X550EM_a)
5140                         return 0;
5141
5142         return -ENOTSUP;
5143 }
5144
5145 /*
5146  * Set up link for 82599 loopback mode Tx->Rx.
5147  */
5148 static inline void __attribute__((cold))
5149 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5150 {
5151         PMD_INIT_FUNC_TRACE();
5152
5153         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5154                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5155                                 IXGBE_SUCCESS) {
5156                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5157                         /* ignore error */
5158                         return;
5159                 }
5160         }
5161
5162         /* Restart link */
5163         IXGBE_WRITE_REG(hw,
5164                         IXGBE_AUTOC,
5165                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5166         ixgbe_reset_pipeline_82599(hw);
5167
5168         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5169         msec_delay(50);
5170 }
5171
5172
5173 /*
5174  * Start Transmit and Receive Units.
5175  */
5176 int __attribute__((cold))
5177 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5178 {
5179         struct ixgbe_hw     *hw;
5180         struct ixgbe_tx_queue *txq;
5181         struct ixgbe_rx_queue *rxq;
5182         uint32_t txdctl;
5183         uint32_t dmatxctl;
5184         uint32_t rxctrl;
5185         uint16_t i;
5186         int ret = 0;
5187
5188         PMD_INIT_FUNC_TRACE();
5189         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5190
5191         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5192                 txq = dev->data->tx_queues[i];
5193                 /* Setup Transmit Threshold Registers */
5194                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5195                 txdctl |= txq->pthresh & 0x7F;
5196                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5197                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5198                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5199         }
5200
5201         if (hw->mac.type != ixgbe_mac_82598EB) {
5202                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5203                 dmatxctl |= IXGBE_DMATXCTL_TE;
5204                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5205         }
5206
5207         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5208                 txq = dev->data->tx_queues[i];
5209                 if (!txq->tx_deferred_start) {
5210                         ret = ixgbe_dev_tx_queue_start(dev, i);
5211                         if (ret < 0)
5212                                 return ret;
5213                 }
5214         }
5215
5216         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5217                 rxq = dev->data->rx_queues[i];
5218                 if (!rxq->rx_deferred_start) {
5219                         ret = ixgbe_dev_rx_queue_start(dev, i);
5220                         if (ret < 0)
5221                                 return ret;
5222                 }
5223         }
5224
5225         /* Enable Receive engine */
5226         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5227         if (hw->mac.type == ixgbe_mac_82598EB)
5228                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5229         rxctrl |= IXGBE_RXCTRL_RXEN;
5230         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5231
5232         /* If loopback mode is enabled, set up the link accordingly */
5233         if (dev->data->dev_conf.lpbk_mode != 0) {
5234                 if (hw->mac.type == ixgbe_mac_82599EB)
5235                         ixgbe_setup_loopback_link_82599(hw);
5236                 else if (hw->mac.type == ixgbe_mac_X540 ||
5237                      hw->mac.type == ixgbe_mac_X550 ||
5238                      hw->mac.type == ixgbe_mac_X550EM_x ||
5239                      hw->mac.type == ixgbe_mac_X550EM_a)
5240                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5241         }
5242
5243 #ifdef RTE_LIBRTE_SECURITY
5244         if ((dev->data->dev_conf.rxmode.offloads &
5245                         DEV_RX_OFFLOAD_SECURITY) ||
5246                 (dev->data->dev_conf.txmode.offloads &
5247                         DEV_TX_OFFLOAD_SECURITY)) {
5248                 ret = ixgbe_crypto_enable_ipsec(dev);
5249                 if (ret != 0) {
5250                         PMD_DRV_LOG(ERR,
5251                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5252                                     ret);
5253                         return ret;
5254                 }
5255         }
5256 #endif
5257
5258         return 0;
5259 }
5260
5261 /*
5262  * Start Receive Units for specified queue.
5263  */
5264 int __attribute__((cold))
5265 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5266 {
5267         struct ixgbe_hw     *hw;
5268         struct ixgbe_rx_queue *rxq;
5269         uint32_t rxdctl;
5270         int poll_ms;
5271
5272         PMD_INIT_FUNC_TRACE();
5273         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5274
5275         rxq = dev->data->rx_queues[rx_queue_id];
5276
5277         /* Allocate buffers for descriptor rings */
5278         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5279                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5280                              rx_queue_id);
5281                 return -1;
5282         }
5283         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5284         rxdctl |= IXGBE_RXDCTL_ENABLE;
5285         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5286
5287         /* Wait until RX Enable ready */
5288         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5289         do {
5290                 rte_delay_ms(1);
5291                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5292         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5293         if (!poll_ms)
5294                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5295         rte_wmb();
5296         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5297         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5298         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5299
5300         return 0;
5301 }
5302
5303 /*
5304  * Stop Receive Units for specified queue.
5305  */
5306 int __attribute__((cold))
5307 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5308 {
5309         struct ixgbe_hw     *hw;
5310         struct ixgbe_adapter *adapter =
5311                 (struct ixgbe_adapter *)dev->data->dev_private;
5312         struct ixgbe_rx_queue *rxq;
5313         uint32_t rxdctl;
5314         int poll_ms;
5315
5316         PMD_INIT_FUNC_TRACE();
5317         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5318
5319         rxq = dev->data->rx_queues[rx_queue_id];
5320
5321         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5322         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5323         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5324
5325         /* Wait until RX Enable bit clear */
5326         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5327         do {
5328                 rte_delay_ms(1);
5329                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5330         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5331         if (!poll_ms)
5332                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5333
5334         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5335
5336         ixgbe_rx_queue_release_mbufs(rxq);
5337         ixgbe_reset_rx_queue(adapter, rxq);
5338         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5339
5340         return 0;
5341 }
5342
5343
5344 /*
5345  * Start Transmit Units for specified queue.
5346  */
5347 int __attribute__((cold))
5348 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5349 {
5350         struct ixgbe_hw     *hw;
5351         struct ixgbe_tx_queue *txq;
5352         uint32_t txdctl;
5353         int poll_ms;
5354
5355         PMD_INIT_FUNC_TRACE();
5356         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5357
5358         txq = dev->data->tx_queues[tx_queue_id];
5359         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5360         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5361         txdctl |= IXGBE_TXDCTL_ENABLE;
5362         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5363
5364         /* Wait until TX Enable ready */
5365         if (hw->mac.type == ixgbe_mac_82599EB) {
5366                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5367                 do {
5368                         rte_delay_ms(1);
5369                         txdctl = IXGBE_READ_REG(hw,
5370                                 IXGBE_TXDCTL(txq->reg_idx));
5371                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5372                 if (!poll_ms)
5373                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5374                                 tx_queue_id);
5375         }
5376         rte_wmb();
5377         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5378         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5379
5380         return 0;
5381 }
5382
5383 /*
5384  * Stop Transmit Units for specified queue.
5385  */
5386 int __attribute__((cold))
5387 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5388 {
5389         struct ixgbe_hw     *hw;
5390         struct ixgbe_tx_queue *txq;
5391         uint32_t txdctl;
5392         uint32_t txtdh, txtdt;
5393         int poll_ms;
5394
5395         PMD_INIT_FUNC_TRACE();
5396         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5397
5398         txq = dev->data->tx_queues[tx_queue_id];
5399
5400         /* Wait until TX queue is empty */
5401         if (hw->mac.type == ixgbe_mac_82599EB) {
5402                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5403                 do {
5404                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5405                         txtdh = IXGBE_READ_REG(hw,
5406                                                IXGBE_TDH(txq->reg_idx));
5407                         txtdt = IXGBE_READ_REG(hw,
5408                                                IXGBE_TDT(txq->reg_idx));
5409                 } while (--poll_ms && (txtdh != txtdt));
5410                 if (!poll_ms)
5411                         PMD_INIT_LOG(ERR,
5412                                 "Tx Queue %d is not empty when stopping.",
5413                                 tx_queue_id);
5414         }
5415
5416         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5417         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5418         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5419
5420         /* Wait until TX Enable bit clear */
5421         if (hw->mac.type == ixgbe_mac_82599EB) {
5422                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5423                 do {
5424                         rte_delay_ms(1);
5425                         txdctl = IXGBE_READ_REG(hw,
5426                                                 IXGBE_TXDCTL(txq->reg_idx));
5427                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5428                 if (!poll_ms)
5429                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5430                                 tx_queue_id);
5431         }
5432
5433         if (txq->ops != NULL) {
5434                 txq->ops->release_mbufs(txq);
5435                 txq->ops->reset(txq);
5436         }
5437         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5438
5439         return 0;
5440 }
5441
5442 void
5443 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5444         struct rte_eth_rxq_info *qinfo)
5445 {
5446         struct ixgbe_rx_queue *rxq;
5447
5448         rxq = dev->data->rx_queues[queue_id];
5449
5450         qinfo->mp = rxq->mb_pool;
5451         qinfo->scattered_rx = dev->data->scattered_rx;
5452         qinfo->nb_desc = rxq->nb_rx_desc;
5453
5454         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5455         qinfo->conf.rx_drop_en = rxq->drop_en;
5456         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5457         qinfo->conf.offloads = rxq->offloads;
5458 }
5459
5460 void
5461 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5462         struct rte_eth_txq_info *qinfo)
5463 {
5464         struct ixgbe_tx_queue *txq;
5465
5466         txq = dev->data->tx_queues[queue_id];
5467
5468         qinfo->nb_desc = txq->nb_tx_desc;
5469
5470         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5471         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5472         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5473
5474         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5475         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5476         qinfo->conf.offloads = txq->offloads;
5477         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5478 }
5479
5480 /*
5481  * [VF] Initializes Receive Unit.
5482  */
5483 int __attribute__((cold))
5484 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5485 {
5486         struct ixgbe_hw     *hw;
5487         struct ixgbe_rx_queue *rxq;
5488         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5489         uint64_t bus_addr;
5490         uint32_t srrctl, psrtype = 0;
5491         uint16_t buf_size;
5492         uint16_t i;
5493         int ret;
5494
5495         PMD_INIT_FUNC_TRACE();
5496         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5497
5498         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5499                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5500                         "it should be power of 2");
5501                 return -1;
5502         }
5503
5504         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5505                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5506                         "it should be equal to or less than %d",
5507                         hw->mac.max_rx_queues);
5508                 return -1;
5509         }
5510
5511         /*
5512          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5513          * disables the VF receipt of packets if the PF MTU is > 1500.
5514          * This is done to deal with 82599 limitations that imposes
5515          * the PF and all VFs to share the same MTU.
5516          * Then, the PF driver enables again the VF receipt of packet when
5517          * the VF driver issues a IXGBE_VF_SET_LPE request.
5518          * In the meantime, the VF device cannot be used, even if the VF driver
5519          * and the Guest VM network stack are ready to accept packets with a
5520          * size up to the PF MTU.
5521          * As a work-around to this PF behaviour, force the call to
5522          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5523          * VF packets received can work in all cases.
5524          */
5525         ixgbevf_rlpml_set_vf(hw,
5526                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5527
5528         /*
5529          * Assume no header split and no VLAN strip support
5530          * on any Rx queue first .
5531          */
5532         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5533         /* Setup RX queues */
5534         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5535                 rxq = dev->data->rx_queues[i];
5536
5537                 /* Allocate buffers for descriptor rings */
5538                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5539                 if (ret)
5540                         return ret;
5541
5542                 /* Setup the Base and Length of the Rx Descriptor Rings */
5543                 bus_addr = rxq->rx_ring_phys_addr;
5544
5545                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5546                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5547                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5548                                 (uint32_t)(bus_addr >> 32));
5549                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5550                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5551                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5552                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5553
5554
5555                 /* Configure the SRRCTL register */
5556                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5557
5558                 /* Set if packets are dropped when no descriptors available */
5559                 if (rxq->drop_en)
5560                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5561
5562                 /*
5563                  * Configure the RX buffer size in the BSIZEPACKET field of
5564                  * the SRRCTL register of the queue.
5565                  * The value is in 1 KB resolution. Valid values can be from
5566                  * 1 KB to 16 KB.
5567                  */
5568                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5569                         RTE_PKTMBUF_HEADROOM);
5570                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5571                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5572
5573                 /*
5574                  * VF modification to write virtual function SRRCTL register
5575                  */
5576                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5577
5578                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5579                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5580
5581                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5582                     /* It adds dual VLAN length for supporting dual VLAN */
5583                     (rxmode->max_rx_pkt_len +
5584                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5585                         if (!dev->data->scattered_rx)
5586                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5587                         dev->data->scattered_rx = 1;
5588                 }
5589
5590                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5591                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5592         }
5593
5594         /* Set RQPL for VF RSS according to max Rx queue */
5595         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5596                 IXGBE_PSRTYPE_RQPL_SHIFT;
5597         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5598
5599         ixgbe_set_rx_function(dev);
5600
5601         return 0;
5602 }
5603
5604 /*
5605  * [VF] Initializes Transmit Unit.
5606  */
5607 void __attribute__((cold))
5608 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5609 {
5610         struct ixgbe_hw     *hw;
5611         struct ixgbe_tx_queue *txq;
5612         uint64_t bus_addr;
5613         uint32_t txctrl;
5614         uint16_t i;
5615
5616         PMD_INIT_FUNC_TRACE();
5617         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5618
5619         /* Setup the Base and Length of the Tx Descriptor Rings */
5620         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5621                 txq = dev->data->tx_queues[i];
5622                 bus_addr = txq->tx_ring_phys_addr;
5623                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5624                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5625                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5626                                 (uint32_t)(bus_addr >> 32));
5627                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5628                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5629                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5630                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5631                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5632
5633                 /*
5634                  * Disable Tx Head Writeback RO bit, since this hoses
5635                  * bookkeeping if things aren't delivered in order.
5636                  */
5637                 txctrl = IXGBE_READ_REG(hw,
5638                                 IXGBE_VFDCA_TXCTRL(i));
5639                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5640                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5641                                 txctrl);
5642         }
5643 }
5644
5645 /*
5646  * [VF] Start Transmit and Receive Units.
5647  */
5648 void __attribute__((cold))
5649 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5650 {
5651         struct ixgbe_hw     *hw;
5652         struct ixgbe_tx_queue *txq;
5653         struct ixgbe_rx_queue *rxq;
5654         uint32_t txdctl;
5655         uint32_t rxdctl;
5656         uint16_t i;
5657         int poll_ms;
5658
5659         PMD_INIT_FUNC_TRACE();
5660         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5661
5662         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5663                 txq = dev->data->tx_queues[i];
5664                 /* Setup Transmit Threshold Registers */
5665                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5666                 txdctl |= txq->pthresh & 0x7F;
5667                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5668                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5669                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5670         }
5671
5672         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5673
5674                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5675                 txdctl |= IXGBE_TXDCTL_ENABLE;
5676                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5677
5678                 poll_ms = 10;
5679                 /* Wait until TX Enable ready */
5680                 do {
5681                         rte_delay_ms(1);
5682                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5683                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5684                 if (!poll_ms)
5685                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5686         }
5687         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5688
5689                 rxq = dev->data->rx_queues[i];
5690
5691                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5692                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5693                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5694
5695                 /* Wait until RX Enable ready */
5696                 poll_ms = 10;
5697                 do {
5698                         rte_delay_ms(1);
5699                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5700                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5701                 if (!poll_ms)
5702                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5703                 rte_wmb();
5704                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5705
5706         }
5707 }
5708
5709 int
5710 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5711                     const struct rte_flow_action_rss *in)
5712 {
5713         if (in->key_len > RTE_DIM(out->key) ||
5714             in->queue_num > RTE_DIM(out->queue))
5715                 return -EINVAL;
5716         out->conf = (struct rte_flow_action_rss){
5717                 .func = in->func,
5718                 .level = in->level,
5719                 .types = in->types,
5720                 .key_len = in->key_len,
5721                 .queue_num = in->queue_num,
5722                 .key = memcpy(out->key, in->key, in->key_len),
5723                 .queue = memcpy(out->queue, in->queue,
5724                                 sizeof(*in->queue) * in->queue_num),
5725         };
5726         return 0;
5727 }
5728
5729 int
5730 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5731                       const struct rte_flow_action_rss *with)
5732 {
5733         return (comp->func == with->func &&
5734                 comp->level == with->level &&
5735                 comp->types == with->types &&
5736                 comp->key_len == with->key_len &&
5737                 comp->queue_num == with->queue_num &&
5738                 !memcmp(comp->key, with->key, with->key_len) &&
5739                 !memcmp(comp->queue, with->queue,
5740                         sizeof(*with->queue) * with->queue_num));
5741 }
5742
5743 int
5744 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5745                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5746 {
5747         struct ixgbe_hw *hw;
5748         uint32_t reta;
5749         uint16_t i;
5750         uint16_t j;
5751         uint16_t sp_reta_size;
5752         uint32_t reta_reg;
5753         struct rte_eth_rss_conf rss_conf = {
5754                 .rss_key = conf->conf.key_len ?
5755                         (void *)(uintptr_t)conf->conf.key : NULL,
5756                 .rss_key_len = conf->conf.key_len,
5757                 .rss_hf = conf->conf.types,
5758         };
5759         struct ixgbe_filter_info *filter_info =
5760                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5761
5762         PMD_INIT_FUNC_TRACE();
5763         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5764
5765         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5766
5767         if (!add) {
5768                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5769                                           &conf->conf)) {
5770                         ixgbe_rss_disable(dev);
5771                         memset(&filter_info->rss_info, 0,
5772                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5773                         return 0;
5774                 }
5775                 return -EINVAL;
5776         }
5777
5778         if (filter_info->rss_info.conf.queue_num)
5779                 return -EINVAL;
5780         /* Fill in redirection table
5781          * The byte-swap is needed because NIC registers are in
5782          * little-endian order.
5783          */
5784         reta = 0;
5785         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5786                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5787
5788                 if (j == conf->conf.queue_num)
5789                         j = 0;
5790                 reta = (reta << 8) | conf->conf.queue[j];
5791                 if ((i & 3) == 3)
5792                         IXGBE_WRITE_REG(hw, reta_reg,
5793                                         rte_bswap32(reta));
5794         }
5795
5796         /* Configure the RSS key and the RSS protocols used to compute
5797          * the RSS hash of input packets.
5798          */
5799         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5800                 ixgbe_rss_disable(dev);
5801                 return 0;
5802         }
5803         if (rss_conf.rss_key == NULL)
5804                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5805         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5806
5807         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5808                 return -EINVAL;
5809
5810         return 0;
5811 }
5812
5813 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5814 __rte_weak int
5815 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5816 {
5817         return -1;
5818 }
5819
5820 __rte_weak uint16_t
5821 ixgbe_recv_pkts_vec(
5822         void __rte_unused *rx_queue,
5823         struct rte_mbuf __rte_unused **rx_pkts,
5824         uint16_t __rte_unused nb_pkts)
5825 {
5826         return 0;
5827 }
5828
5829 __rte_weak uint16_t
5830 ixgbe_recv_scattered_pkts_vec(
5831         void __rte_unused *rx_queue,
5832         struct rte_mbuf __rte_unused **rx_pkts,
5833         uint16_t __rte_unused nb_pkts)
5834 {
5835         return 0;
5836 }
5837
5838 __rte_weak int
5839 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5840 {
5841         return -1;
5842 }