ce589b97a85c0f19dfab7eb62160d43f645436cf
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45 #include <rte_vect.h>
46
47 #include "ixgbe_logs.h"
48 #include "base/ixgbe_api.h"
49 #include "base/ixgbe_vf.h"
50 #include "ixgbe_ethdev.h"
51 #include "base/ixgbe_dcb.h"
52 #include "base/ixgbe_common.h"
53 #include "ixgbe_rxtx.h"
54
55 #ifdef RTE_LIBRTE_IEEE1588
56 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
57 #else
58 #define IXGBE_TX_IEEE1588_TMST 0
59 #endif
60 /* Bit Mask to indicate what bits required for building TX context */
61 #define IXGBE_TX_OFFLOAD_MASK (                  \
62                 PKT_TX_OUTER_IPV6 |              \
63                 PKT_TX_OUTER_IPV4 |              \
64                 PKT_TX_IPV6 |                    \
65                 PKT_TX_IPV4 |                    \
66                 PKT_TX_VLAN_PKT |                \
67                 PKT_TX_IP_CKSUM |                \
68                 PKT_TX_L4_MASK |                 \
69                 PKT_TX_TCP_SEG |                 \
70                 PKT_TX_MACSEC |                  \
71                 PKT_TX_OUTER_IP_CKSUM |          \
72                 PKT_TX_SEC_OFFLOAD |     \
73                 IXGBE_TX_IEEE1588_TMST)
74
75 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
76                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
77
78 #if 1
79 #define RTE_PMD_USE_PREFETCH
80 #endif
81
82 #ifdef RTE_PMD_USE_PREFETCH
83 /*
84  * Prefetch a cache line into all cache levels.
85  */
86 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
87 #else
88 #define rte_ixgbe_prefetch(p)   do {} while (0)
89 #endif
90
91 /*********************************************************************
92  *
93  *  TX functions
94  *
95  **********************************************************************/
96
97 /*
98  * Check for descriptors with their DD bit set and free mbufs.
99  * Return the total number of buffers freed.
100  */
101 static __rte_always_inline int
102 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
103 {
104         struct ixgbe_tx_entry *txep;
105         uint32_t status;
106         int i, nb_free = 0;
107         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
108
109         /* check DD bit on threshold descriptor */
110         status = txq->tx_ring[txq->tx_next_dd].wb.status;
111         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
112                 return 0;
113
114         /*
115          * first buffer to free from S/W ring is at index
116          * tx_next_dd - (tx_rs_thresh-1)
117          */
118         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
119
120         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
121                 /* free buffers one at a time */
122                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
123                 txep->mbuf = NULL;
124
125                 if (unlikely(m == NULL))
126                         continue;
127
128                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
129                     (nb_free > 0 && m->pool != free[0]->pool)) {
130                         rte_mempool_put_bulk(free[0]->pool,
131                                              (void **)free, nb_free);
132                         nb_free = 0;
133                 }
134
135                 free[nb_free++] = m;
136         }
137
138         if (nb_free > 0)
139                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
140
141         /* buffers were freed, update counters */
142         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
143         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
144         if (txq->tx_next_dd >= txq->nb_tx_desc)
145                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
146
147         return txq->tx_rs_thresh;
148 }
149
150 /* Populate 4 descriptors with data from 4 mbufs */
151 static inline void
152 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
153 {
154         uint64_t buf_dma_addr;
155         uint32_t pkt_len;
156         int i;
157
158         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
159                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
160                 pkt_len = (*pkts)->data_len;
161
162                 /* write data to descriptor */
163                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
164
165                 txdp->read.cmd_type_len =
166                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
167
168                 txdp->read.olinfo_status =
169                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
170
171                 rte_prefetch0(&(*pkts)->pool);
172         }
173 }
174
175 /* Populate 1 descriptor with data from 1 mbuf */
176 static inline void
177 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
178 {
179         uint64_t buf_dma_addr;
180         uint32_t pkt_len;
181
182         buf_dma_addr = rte_mbuf_data_iova(*pkts);
183         pkt_len = (*pkts)->data_len;
184
185         /* write data to descriptor */
186         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187         txdp->read.cmd_type_len =
188                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
189         txdp->read.olinfo_status =
190                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
191         rte_prefetch0(&(*pkts)->pool);
192 }
193
194 /*
195  * Fill H/W descriptor ring with mbuf data.
196  * Copy mbuf pointers to the S/W ring.
197  */
198 static inline void
199 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
200                       uint16_t nb_pkts)
201 {
202         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
203         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
204         const int N_PER_LOOP = 4;
205         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
206         int mainpart, leftover;
207         int i, j;
208
209         /*
210          * Process most of the packets in chunks of N pkts.  Any
211          * leftover packets will get processed one at a time.
212          */
213         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
214         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
215         for (i = 0; i < mainpart; i += N_PER_LOOP) {
216                 /* Copy N mbuf pointers to the S/W ring */
217                 for (j = 0; j < N_PER_LOOP; ++j) {
218                         (txep + i + j)->mbuf = *(pkts + i + j);
219                 }
220                 tx4(txdp + i, pkts + i);
221         }
222
223         if (unlikely(leftover > 0)) {
224                 for (i = 0; i < leftover; ++i) {
225                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
226                         tx1(txdp + mainpart + i, pkts + mainpart + i);
227                 }
228         }
229 }
230
231 static inline uint16_t
232 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
233              uint16_t nb_pkts)
234 {
235         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
236         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
237         uint16_t n = 0;
238
239         /*
240          * Begin scanning the H/W ring for done descriptors when the
241          * number of available descriptors drops below tx_free_thresh.  For
242          * each done descriptor, free the associated buffer.
243          */
244         if (txq->nb_tx_free < txq->tx_free_thresh)
245                 ixgbe_tx_free_bufs(txq);
246
247         /* Only use descriptors that are available */
248         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
249         if (unlikely(nb_pkts == 0))
250                 return 0;
251
252         /* Use exactly nb_pkts descriptors */
253         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
254
255         /*
256          * At this point, we know there are enough descriptors in the
257          * ring to transmit all the packets.  This assumes that each
258          * mbuf contains a single segment, and that no new offloads
259          * are expected, which would require a new context descriptor.
260          */
261
262         /*
263          * See if we're going to wrap-around. If so, handle the top
264          * of the descriptor ring first, then do the bottom.  If not,
265          * the processing looks just like the "bottom" part anyway...
266          */
267         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
268                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
269                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
270
271                 /*
272                  * We know that the last descriptor in the ring will need to
273                  * have its RS bit set because tx_rs_thresh has to be
274                  * a divisor of the ring size
275                  */
276                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
277                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
278                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
279
280                 txq->tx_tail = 0;
281         }
282
283         /* Fill H/W descriptor ring with mbuf data */
284         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
285         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
286
287         /*
288          * Determine if RS bit should be set
289          * This is what we actually want:
290          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
291          * but instead of subtracting 1 and doing >=, we can just do
292          * greater than without subtracting.
293          */
294         if (txq->tx_tail > txq->tx_next_rs) {
295                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
296                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
297                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
298                                                 txq->tx_rs_thresh);
299                 if (txq->tx_next_rs >= txq->nb_tx_desc)
300                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
301         }
302
303         /*
304          * Check for wrap-around. This would only happen if we used
305          * up to the last descriptor in the ring, no more, no less.
306          */
307         if (txq->tx_tail >= txq->nb_tx_desc)
308                 txq->tx_tail = 0;
309
310         /* update tail pointer */
311         rte_wmb();
312         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
313
314         return nb_pkts;
315 }
316
317 uint16_t
318 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
319                        uint16_t nb_pkts)
320 {
321         uint16_t nb_tx;
322
323         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
324         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
325                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
326
327         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
328         nb_tx = 0;
329         while (nb_pkts) {
330                 uint16_t ret, n;
331
332                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
333                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
334                 nb_tx = (uint16_t)(nb_tx + ret);
335                 nb_pkts = (uint16_t)(nb_pkts - ret);
336                 if (ret < n)
337                         break;
338         }
339
340         return nb_tx;
341 }
342
343 static uint16_t
344 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
345                     uint16_t nb_pkts)
346 {
347         uint16_t nb_tx = 0;
348         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
349
350         while (nb_pkts) {
351                 uint16_t ret, num;
352
353                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
354                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
355                                                  num);
356                 nb_tx += ret;
357                 nb_pkts -= ret;
358                 if (ret < num)
359                         break;
360         }
361
362         return nb_tx;
363 }
364
365 static inline void
366 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
367                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
368                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
369                 __rte_unused uint64_t *mdata)
370 {
371         uint32_t type_tucmd_mlhl;
372         uint32_t mss_l4len_idx = 0;
373         uint32_t ctx_idx;
374         uint32_t vlan_macip_lens;
375         union ixgbe_tx_offload tx_offload_mask;
376         uint32_t seqnum_seed = 0;
377
378         ctx_idx = txq->ctx_curr;
379         tx_offload_mask.data[0] = 0;
380         tx_offload_mask.data[1] = 0;
381         type_tucmd_mlhl = 0;
382
383         /* Specify which HW CTX to upload. */
384         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
385
386         if (ol_flags & PKT_TX_VLAN_PKT) {
387                 tx_offload_mask.vlan_tci |= ~0;
388         }
389
390         /* check if TCP segmentation required for this packet */
391         if (ol_flags & PKT_TX_TCP_SEG) {
392                 /* implies IP cksum in IPv4 */
393                 if (ol_flags & PKT_TX_IP_CKSUM)
394                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
397                 else
398                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
401
402                 tx_offload_mask.l2_len |= ~0;
403                 tx_offload_mask.l3_len |= ~0;
404                 tx_offload_mask.l4_len |= ~0;
405                 tx_offload_mask.tso_segsz |= ~0;
406                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408         } else { /* no TSO, check if hardware checksum is needed */
409                 if (ol_flags & PKT_TX_IP_CKSUM) {
410                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411                         tx_offload_mask.l2_len |= ~0;
412                         tx_offload_mask.l3_len |= ~0;
413                 }
414
415                 switch (ol_flags & PKT_TX_L4_MASK) {
416                 case PKT_TX_UDP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
420                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
421                         tx_offload_mask.l2_len |= ~0;
422                         tx_offload_mask.l3_len |= ~0;
423                         break;
424                 case PKT_TX_TCP_CKSUM:
425                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
426                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
427                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
428                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
429                         tx_offload_mask.l2_len |= ~0;
430                         tx_offload_mask.l3_len |= ~0;
431                         break;
432                 case PKT_TX_SCTP_CKSUM:
433                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
434                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
435                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
436                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
437                         tx_offload_mask.l2_len |= ~0;
438                         tx_offload_mask.l3_len |= ~0;
439                         break;
440                 default:
441                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
442                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
443                         break;
444                 }
445         }
446
447         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
448                 tx_offload_mask.outer_l2_len |= ~0;
449                 tx_offload_mask.outer_l3_len |= ~0;
450                 tx_offload_mask.l2_len |= ~0;
451                 seqnum_seed |= tx_offload.outer_l3_len
452                                << IXGBE_ADVTXD_OUTER_IPLEN;
453                 seqnum_seed |= tx_offload.l2_len
454                                << IXGBE_ADVTXD_TUNNEL_LEN;
455         }
456 #ifdef RTE_LIBRTE_SECURITY
457         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
458                 union ixgbe_crypto_tx_desc_md *md =
459                                 (union ixgbe_crypto_tx_desc_md *)mdata;
460                 seqnum_seed |=
461                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
462                 type_tucmd_mlhl |= md->enc ?
463                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
464                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
465                 type_tucmd_mlhl |=
466                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
467                 tx_offload_mask.sa_idx |= ~0;
468                 tx_offload_mask.sec_pad_len |= ~0;
469         }
470 #endif
471
472         txq->ctx_cache[ctx_idx].flags = ol_flags;
473         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
474                 tx_offload_mask.data[0] & tx_offload.data[0];
475         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
476                 tx_offload_mask.data[1] & tx_offload.data[1];
477         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
478
479         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
480         vlan_macip_lens = tx_offload.l3_len;
481         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
482                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
483                                     IXGBE_ADVTXD_MACLEN_SHIFT);
484         else
485                 vlan_macip_lens |= (tx_offload.l2_len <<
486                                     IXGBE_ADVTXD_MACLEN_SHIFT);
487         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
488         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
489         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
490         ctx_txd->seqnum_seed     = seqnum_seed;
491 }
492
493 /*
494  * Check which hardware context can be used. Use the existing match
495  * or create a new context descriptor.
496  */
497 static inline uint32_t
498 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
499                    union ixgbe_tx_offload tx_offload)
500 {
501         /* If match with the current used context */
502         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
503                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
504                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
505                      & tx_offload.data[0])) &&
506                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
507                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
508                      & tx_offload.data[1]))))
509                 return txq->ctx_curr;
510
511         /* What if match with the next context  */
512         txq->ctx_curr ^= 1;
513         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
514                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
515                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
516                      & tx_offload.data[0])) &&
517                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
518                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
519                      & tx_offload.data[1]))))
520                 return txq->ctx_curr;
521
522         /* Mismatch, use the previous context */
523         return IXGBE_CTX_NUM;
524 }
525
526 static inline uint32_t
527 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
528 {
529         uint32_t tmp = 0;
530
531         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
532                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
533         if (ol_flags & PKT_TX_IP_CKSUM)
534                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
535         if (ol_flags & PKT_TX_TCP_SEG)
536                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
537         return tmp;
538 }
539
540 static inline uint32_t
541 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
542 {
543         uint32_t cmdtype = 0;
544
545         if (ol_flags & PKT_TX_VLAN_PKT)
546                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
547         if (ol_flags & PKT_TX_TCP_SEG)
548                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
549         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
550                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
551         if (ol_flags & PKT_TX_MACSEC)
552                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
553         return cmdtype;
554 }
555
556 /* Default RS bit threshold values */
557 #ifndef DEFAULT_TX_RS_THRESH
558 #define DEFAULT_TX_RS_THRESH   32
559 #endif
560 #ifndef DEFAULT_TX_FREE_THRESH
561 #define DEFAULT_TX_FREE_THRESH 32
562 #endif
563
564 /* Reset transmit descriptors after they have been used */
565 static inline int
566 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
567 {
568         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
569         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
570         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
571         uint16_t nb_tx_desc = txq->nb_tx_desc;
572         uint16_t desc_to_clean_to;
573         uint16_t nb_tx_to_clean;
574         uint32_t status;
575
576         /* Determine the last descriptor needing to be cleaned */
577         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
578         if (desc_to_clean_to >= nb_tx_desc)
579                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
580
581         /* Check to make sure the last descriptor to clean is done */
582         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
583         status = txr[desc_to_clean_to].wb.status;
584         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
585                 PMD_TX_FREE_LOG(DEBUG,
586                                 "TX descriptor %4u is not done"
587                                 "(port=%d queue=%d)",
588                                 desc_to_clean_to,
589                                 txq->port_id, txq->queue_id);
590                 /* Failed to clean any descriptors, better luck next time */
591                 return -(1);
592         }
593
594         /* Figure out how many descriptors will be cleaned */
595         if (last_desc_cleaned > desc_to_clean_to)
596                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
597                                                         desc_to_clean_to);
598         else
599                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
600                                                 last_desc_cleaned);
601
602         PMD_TX_FREE_LOG(DEBUG,
603                         "Cleaning %4u TX descriptors: %4u to %4u "
604                         "(port=%d queue=%d)",
605                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
606                         txq->port_id, txq->queue_id);
607
608         /*
609          * The last descriptor to clean is done, so that means all the
610          * descriptors from the last descriptor that was cleaned
611          * up to the last descriptor with the RS bit set
612          * are done. Only reset the threshold descriptor.
613          */
614         txr[desc_to_clean_to].wb.status = 0;
615
616         /* Update the txq to reflect the last descriptor that was cleaned */
617         txq->last_desc_cleaned = desc_to_clean_to;
618         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
619
620         /* No Error */
621         return 0;
622 }
623
624 uint16_t
625 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
626                 uint16_t nb_pkts)
627 {
628         struct ixgbe_tx_queue *txq;
629         struct ixgbe_tx_entry *sw_ring;
630         struct ixgbe_tx_entry *txe, *txn;
631         volatile union ixgbe_adv_tx_desc *txr;
632         volatile union ixgbe_adv_tx_desc *txd, *txp;
633         struct rte_mbuf     *tx_pkt;
634         struct rte_mbuf     *m_seg;
635         uint64_t buf_dma_addr;
636         uint32_t olinfo_status;
637         uint32_t cmd_type_len;
638         uint32_t pkt_len;
639         uint16_t slen;
640         uint64_t ol_flags;
641         uint16_t tx_id;
642         uint16_t tx_last;
643         uint16_t nb_tx;
644         uint16_t nb_used;
645         uint64_t tx_ol_req;
646         uint32_t ctx = 0;
647         uint32_t new_ctx;
648         union ixgbe_tx_offload tx_offload;
649 #ifdef RTE_LIBRTE_SECURITY
650         uint8_t use_ipsec;
651 #endif
652
653         tx_offload.data[0] = 0;
654         tx_offload.data[1] = 0;
655         txq = tx_queue;
656         sw_ring = txq->sw_ring;
657         txr     = txq->tx_ring;
658         tx_id   = txq->tx_tail;
659         txe = &sw_ring[tx_id];
660         txp = NULL;
661
662         /* Determine if the descriptor ring needs to be cleaned. */
663         if (txq->nb_tx_free < txq->tx_free_thresh)
664                 ixgbe_xmit_cleanup(txq);
665
666         rte_prefetch0(&txe->mbuf->pool);
667
668         /* TX loop */
669         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
670                 new_ctx = 0;
671                 tx_pkt = *tx_pkts++;
672                 pkt_len = tx_pkt->pkt_len;
673
674                 /*
675                  * Determine how many (if any) context descriptors
676                  * are needed for offload functionality.
677                  */
678                 ol_flags = tx_pkt->ol_flags;
679 #ifdef RTE_LIBRTE_SECURITY
680                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
681 #endif
682
683                 /* If hardware offload required */
684                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
685                 if (tx_ol_req) {
686                         tx_offload.l2_len = tx_pkt->l2_len;
687                         tx_offload.l3_len = tx_pkt->l3_len;
688                         tx_offload.l4_len = tx_pkt->l4_len;
689                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
690                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
691                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
692                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
693 #ifdef RTE_LIBRTE_SECURITY
694                         if (use_ipsec) {
695                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
696                                         (union ixgbe_crypto_tx_desc_md *)
697                                                         &tx_pkt->udata64;
698                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
699                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
700                         }
701 #endif
702
703                         /* If new context need be built or reuse the exist ctx. */
704                         ctx = what_advctx_update(txq, tx_ol_req,
705                                 tx_offload);
706                         /* Only allocate context descriptor if required*/
707                         new_ctx = (ctx == IXGBE_CTX_NUM);
708                         ctx = txq->ctx_curr;
709                 }
710
711                 /*
712                  * Keep track of how many descriptors are used this loop
713                  * This will always be the number of segments + the number of
714                  * Context descriptors required to transmit the packet
715                  */
716                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
717
718                 if (txp != NULL &&
719                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
720                         /* set RS on the previous packet in the burst */
721                         txp->read.cmd_type_len |=
722                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
723
724                 /*
725                  * The number of descriptors that must be allocated for a
726                  * packet is the number of segments of that packet, plus 1
727                  * Context Descriptor for the hardware offload, if any.
728                  * Determine the last TX descriptor to allocate in the TX ring
729                  * for the packet, starting from the current position (tx_id)
730                  * in the ring.
731                  */
732                 tx_last = (uint16_t) (tx_id + nb_used - 1);
733
734                 /* Circular ring */
735                 if (tx_last >= txq->nb_tx_desc)
736                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
737
738                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
739                            " tx_first=%u tx_last=%u",
740                            (unsigned) txq->port_id,
741                            (unsigned) txq->queue_id,
742                            (unsigned) pkt_len,
743                            (unsigned) tx_id,
744                            (unsigned) tx_last);
745
746                 /*
747                  * Make sure there are enough TX descriptors available to
748                  * transmit the entire packet.
749                  * nb_used better be less than or equal to txq->tx_rs_thresh
750                  */
751                 if (nb_used > txq->nb_tx_free) {
752                         PMD_TX_FREE_LOG(DEBUG,
753                                         "Not enough free TX descriptors "
754                                         "nb_used=%4u nb_free=%4u "
755                                         "(port=%d queue=%d)",
756                                         nb_used, txq->nb_tx_free,
757                                         txq->port_id, txq->queue_id);
758
759                         if (ixgbe_xmit_cleanup(txq) != 0) {
760                                 /* Could not clean any descriptors */
761                                 if (nb_tx == 0)
762                                         return 0;
763                                 goto end_of_tx;
764                         }
765
766                         /* nb_used better be <= txq->tx_rs_thresh */
767                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
768                                 PMD_TX_FREE_LOG(DEBUG,
769                                         "The number of descriptors needed to "
770                                         "transmit the packet exceeds the "
771                                         "RS bit threshold. This will impact "
772                                         "performance."
773                                         "nb_used=%4u nb_free=%4u "
774                                         "tx_rs_thresh=%4u. "
775                                         "(port=%d queue=%d)",
776                                         nb_used, txq->nb_tx_free,
777                                         txq->tx_rs_thresh,
778                                         txq->port_id, txq->queue_id);
779                                 /*
780                                  * Loop here until there are enough TX
781                                  * descriptors or until the ring cannot be
782                                  * cleaned.
783                                  */
784                                 while (nb_used > txq->nb_tx_free) {
785                                         if (ixgbe_xmit_cleanup(txq) != 0) {
786                                                 /*
787                                                  * Could not clean any
788                                                  * descriptors
789                                                  */
790                                                 if (nb_tx == 0)
791                                                         return 0;
792                                                 goto end_of_tx;
793                                         }
794                                 }
795                         }
796                 }
797
798                 /*
799                  * By now there are enough free TX descriptors to transmit
800                  * the packet.
801                  */
802
803                 /*
804                  * Set common flags of all TX Data Descriptors.
805                  *
806                  * The following bits must be set in all Data Descriptors:
807                  *   - IXGBE_ADVTXD_DTYP_DATA
808                  *   - IXGBE_ADVTXD_DCMD_DEXT
809                  *
810                  * The following bits must be set in the first Data Descriptor
811                  * and are ignored in the other ones:
812                  *   - IXGBE_ADVTXD_DCMD_IFCS
813                  *   - IXGBE_ADVTXD_MAC_1588
814                  *   - IXGBE_ADVTXD_DCMD_VLE
815                  *
816                  * The following bits must only be set in the last Data
817                  * Descriptor:
818                  *   - IXGBE_TXD_CMD_EOP
819                  *
820                  * The following bits can be set in any Data Descriptor, but
821                  * are only set in the last Data Descriptor:
822                  *   - IXGBE_TXD_CMD_RS
823                  */
824                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
825                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
826
827 #ifdef RTE_LIBRTE_IEEE1588
828                 if (ol_flags & PKT_TX_IEEE1588_TMST)
829                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
830 #endif
831
832                 olinfo_status = 0;
833                 if (tx_ol_req) {
834
835                         if (ol_flags & PKT_TX_TCP_SEG) {
836                                 /* when TSO is on, paylen in descriptor is the
837                                  * not the packet len but the tcp payload len */
838                                 pkt_len -= (tx_offload.l2_len +
839                                         tx_offload.l3_len + tx_offload.l4_len);
840                         }
841
842                         /*
843                          * Setup the TX Advanced Context Descriptor if required
844                          */
845                         if (new_ctx) {
846                                 volatile struct ixgbe_adv_tx_context_desc *
847                                     ctx_txd;
848
849                                 ctx_txd = (volatile struct
850                                     ixgbe_adv_tx_context_desc *)
851                                     &txr[tx_id];
852
853                                 txn = &sw_ring[txe->next_id];
854                                 rte_prefetch0(&txn->mbuf->pool);
855
856                                 if (txe->mbuf != NULL) {
857                                         rte_pktmbuf_free_seg(txe->mbuf);
858                                         txe->mbuf = NULL;
859                                 }
860
861                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
862                                         tx_offload, &tx_pkt->udata64);
863
864                                 txe->last_id = tx_last;
865                                 tx_id = txe->next_id;
866                                 txe = txn;
867                         }
868
869                         /*
870                          * Setup the TX Advanced Data Descriptor,
871                          * This path will go through
872                          * whatever new/reuse the context descriptor
873                          */
874                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
875                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
876                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
877                 }
878
879                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
880 #ifdef RTE_LIBRTE_SECURITY
881                 if (use_ipsec)
882                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
883 #endif
884
885                 m_seg = tx_pkt;
886                 do {
887                         txd = &txr[tx_id];
888                         txn = &sw_ring[txe->next_id];
889                         rte_prefetch0(&txn->mbuf->pool);
890
891                         if (txe->mbuf != NULL)
892                                 rte_pktmbuf_free_seg(txe->mbuf);
893                         txe->mbuf = m_seg;
894
895                         /*
896                          * Set up Transmit Data Descriptor.
897                          */
898                         slen = m_seg->data_len;
899                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
900                         txd->read.buffer_addr =
901                                 rte_cpu_to_le_64(buf_dma_addr);
902                         txd->read.cmd_type_len =
903                                 rte_cpu_to_le_32(cmd_type_len | slen);
904                         txd->read.olinfo_status =
905                                 rte_cpu_to_le_32(olinfo_status);
906                         txe->last_id = tx_last;
907                         tx_id = txe->next_id;
908                         txe = txn;
909                         m_seg = m_seg->next;
910                 } while (m_seg != NULL);
911
912                 /*
913                  * The last packet data descriptor needs End Of Packet (EOP)
914                  */
915                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
916                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
917                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
918
919                 /* Set RS bit only on threshold packets' last descriptor */
920                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
921                         PMD_TX_FREE_LOG(DEBUG,
922                                         "Setting RS bit on TXD id="
923                                         "%4u (port=%d queue=%d)",
924                                         tx_last, txq->port_id, txq->queue_id);
925
926                         cmd_type_len |= IXGBE_TXD_CMD_RS;
927
928                         /* Update txq RS bit counters */
929                         txq->nb_tx_used = 0;
930                         txp = NULL;
931                 } else
932                         txp = txd;
933
934                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
935         }
936
937 end_of_tx:
938         /* set RS on last packet in the burst */
939         if (txp != NULL)
940                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
941
942         rte_wmb();
943
944         /*
945          * Set the Transmit Descriptor Tail (TDT)
946          */
947         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
948                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
949                    (unsigned) tx_id, (unsigned) nb_tx);
950         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
951         txq->tx_tail = tx_id;
952
953         return nb_tx;
954 }
955
956 /*********************************************************************
957  *
958  *  TX prep functions
959  *
960  **********************************************************************/
961 uint16_t
962 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
963 {
964         int i, ret;
965         uint64_t ol_flags;
966         struct rte_mbuf *m;
967         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
968
969         for (i = 0; i < nb_pkts; i++) {
970                 m = tx_pkts[i];
971                 ol_flags = m->ol_flags;
972
973                 /**
974                  * Check if packet meets requirements for number of segments
975                  *
976                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
977                  *       non-TSO
978                  */
979
980                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
981                         rte_errno = EINVAL;
982                         return i;
983                 }
984
985                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
986                         rte_errno = ENOTSUP;
987                         return i;
988                 }
989
990                 /* check the size of packet */
991                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
992                         rte_errno = EINVAL;
993                         return i;
994                 }
995
996 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
997                 ret = rte_validate_tx_offload(m);
998                 if (ret != 0) {
999                         rte_errno = -ret;
1000                         return i;
1001                 }
1002 #endif
1003                 ret = rte_net_intel_cksum_prepare(m);
1004                 if (ret != 0) {
1005                         rte_errno = -ret;
1006                         return i;
1007                 }
1008         }
1009
1010         return i;
1011 }
1012
1013 /*********************************************************************
1014  *
1015  *  RX functions
1016  *
1017  **********************************************************************/
1018
1019 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1020 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1021 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1022 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1023 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1024 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1028 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1029 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1030 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1031 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1032 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1036 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1040 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1044 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1048 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1052
1053 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1076
1077 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1100
1101 /**
1102  * Use 2 different table for normal packet and tunnel packet
1103  * to save the space.
1104  */
1105 const uint32_t
1106         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1107         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1108         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1109                 RTE_PTYPE_L3_IPV4,
1110         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1112         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1114         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1116         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4_EXT,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1122         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1124         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV6,
1126         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1128         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1130         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1132         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6_EXT,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1138         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1140         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1142                 RTE_PTYPE_INNER_L3_IPV6,
1143         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1144                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1145                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1146         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1147                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1148         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1149         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1150                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1151                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1152         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1153                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1154                 RTE_PTYPE_INNER_L3_IPV6,
1155         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1156                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1157                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1158         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1159                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1160                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1161         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1162                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1163                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1164         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1165                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1166                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1167         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1168                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1169                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1170         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1171                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1172                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1173         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1174                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1175                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1176         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1178                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1179         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1180                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1181                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1182         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1183                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1184                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1185         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1186                 RTE_PTYPE_L2_ETHER |
1187                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1188                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1189 };
1190
1191 const uint32_t
1192         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1193         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1194                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1195                 RTE_PTYPE_INNER_L2_ETHER,
1196         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1197                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1198                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1199         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1200                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1201                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1202         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1203                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1205         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1206                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1207                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1208         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1209                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1211         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1212                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1214         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1215                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1216                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1217                 RTE_PTYPE_INNER_L4_TCP,
1218         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1219                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1220                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1221                 RTE_PTYPE_INNER_L4_TCP,
1222         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1223                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1224                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1226                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1228                 RTE_PTYPE_INNER_L4_TCP,
1229         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1230                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1231                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1232                 RTE_PTYPE_INNER_L3_IPV4,
1233         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1234                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1236                 RTE_PTYPE_INNER_L4_UDP,
1237         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1238                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1240                 RTE_PTYPE_INNER_L4_UDP,
1241         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1242                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1243                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1244                 RTE_PTYPE_INNER_L4_SCTP,
1245         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1246                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1247                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1248         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1249                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1250                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1251                 RTE_PTYPE_INNER_L4_UDP,
1252         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1253                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1254                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1255                 RTE_PTYPE_INNER_L4_SCTP,
1256         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1257                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1258                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1259                 RTE_PTYPE_INNER_L3_IPV4,
1260         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1261                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1262                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1263                 RTE_PTYPE_INNER_L4_SCTP,
1264         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1265                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1266                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1267                 RTE_PTYPE_INNER_L4_SCTP,
1268         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1269                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1270                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1271                 RTE_PTYPE_INNER_L4_TCP,
1272         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1273                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1274                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1275                 RTE_PTYPE_INNER_L4_UDP,
1276
1277         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1278                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1280         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1281                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1282                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1283                 RTE_PTYPE_INNER_L3_IPV4,
1284         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1285                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1286                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1287                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1288         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1289                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1290                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1291                 RTE_PTYPE_INNER_L3_IPV6,
1292         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1293                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1294                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1295                 RTE_PTYPE_INNER_L3_IPV4,
1296         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1297                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1298                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1299                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1300         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1301                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1302                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1303                 RTE_PTYPE_INNER_L3_IPV4,
1304         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1305                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1306                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1307                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1308         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1309                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1310                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1311                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1312         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1313                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1314                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1315                 RTE_PTYPE_INNER_L3_IPV4,
1316         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1317                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1318                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1319                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1320         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1321                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1322                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1323                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1324         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1325                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1326                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1327                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1328         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1329                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1330                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1331                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1332         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1333                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1334                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1335                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1336         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1337                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1338                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1339                 RTE_PTYPE_INNER_L3_IPV4,
1340         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1341                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1342                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1343                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1344         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1345                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1346                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1347                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1348         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1349                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1350                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1351                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1352         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1353                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1354                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1355                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1356         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1357                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1358                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1359                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1360         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1361                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1362                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1363                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1364         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1365                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1366                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1367                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1368 };
1369
1370 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1371 static inline uint32_t
1372 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1373 {
1374
1375         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1376                 return RTE_PTYPE_UNKNOWN;
1377
1378         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1379
1380         /* For tunnel packet */
1381         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1382                 /* Remove the tunnel bit to save the space. */
1383                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1384                 return ptype_table_tn[pkt_info];
1385         }
1386
1387         /**
1388          * For x550, if it's not tunnel,
1389          * tunnel type bit should be set to 0.
1390          * Reuse 82599's mask.
1391          */
1392         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1393
1394         return ptype_table[pkt_info];
1395 }
1396
1397 static inline uint64_t
1398 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1399 {
1400         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1401                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1402                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1403                 PKT_RX_RSS_HASH, 0, 0, 0,
1404                 0, 0, 0,  PKT_RX_FDIR,
1405         };
1406 #ifdef RTE_LIBRTE_IEEE1588
1407         static uint64_t ip_pkt_etqf_map[8] = {
1408                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1409                 0, 0, 0, 0,
1410         };
1411
1412         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1413                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1414                                 ip_rss_types_map[pkt_info & 0XF];
1415         else
1416                 return ip_rss_types_map[pkt_info & 0XF];
1417 #else
1418         return ip_rss_types_map[pkt_info & 0XF];
1419 #endif
1420 }
1421
1422 static inline uint64_t
1423 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1424 {
1425         uint64_t pkt_flags;
1426
1427         /*
1428          * Check if VLAN present only.
1429          * Do not check whether L3/L4 rx checksum done by NIC or not,
1430          * That can be found from rte_eth_rxmode.offloads flag
1431          */
1432         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1433
1434 #ifdef RTE_LIBRTE_IEEE1588
1435         if (rx_status & IXGBE_RXD_STAT_TMST)
1436                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1437 #endif
1438         return pkt_flags;
1439 }
1440
1441 static inline uint64_t
1442 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1443 {
1444         uint64_t pkt_flags;
1445
1446         /*
1447          * Bit 31: IPE, IPv4 checksum error
1448          * Bit 30: L4I, L4I integrity error
1449          */
1450         static uint64_t error_to_pkt_flags_map[4] = {
1451                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1452                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1453                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1454                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1455         };
1456         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1457                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1458
1459         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1460             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1461                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1462         }
1463
1464 #ifdef RTE_LIBRTE_SECURITY
1465         if (rx_status & IXGBE_RXD_STAT_SECP) {
1466                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1467                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1468                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1469         }
1470 #endif
1471
1472         return pkt_flags;
1473 }
1474
1475 /*
1476  * LOOK_AHEAD defines how many desc statuses to check beyond the
1477  * current descriptor.
1478  * It must be a pound define for optimal performance.
1479  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1480  * function only works with LOOK_AHEAD=8.
1481  */
1482 #define LOOK_AHEAD 8
1483 #if (LOOK_AHEAD != 8)
1484 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1485 #endif
1486 static inline int
1487 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1488 {
1489         volatile union ixgbe_adv_rx_desc *rxdp;
1490         struct ixgbe_rx_entry *rxep;
1491         struct rte_mbuf *mb;
1492         uint16_t pkt_len;
1493         uint64_t pkt_flags;
1494         int nb_dd;
1495         uint32_t s[LOOK_AHEAD];
1496         uint32_t pkt_info[LOOK_AHEAD];
1497         int i, j, nb_rx = 0;
1498         uint32_t status;
1499         uint64_t vlan_flags = rxq->vlan_flags;
1500
1501         /* get references to current descriptor and S/W ring entry */
1502         rxdp = &rxq->rx_ring[rxq->rx_tail];
1503         rxep = &rxq->sw_ring[rxq->rx_tail];
1504
1505         status = rxdp->wb.upper.status_error;
1506         /* check to make sure there is at least 1 packet to receive */
1507         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1508                 return 0;
1509
1510         /*
1511          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1512          * reference packets that are ready to be received.
1513          */
1514         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1515              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1516                 /* Read desc statuses backwards to avoid race condition */
1517                 for (j = 0; j < LOOK_AHEAD; j++)
1518                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1519
1520                 rte_smp_rmb();
1521
1522                 /* Compute how many status bits were set */
1523                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1524                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1525                         ;
1526
1527                 for (j = 0; j < nb_dd; j++)
1528                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1529                                                        lo_dword.data);
1530
1531                 nb_rx += nb_dd;
1532
1533                 /* Translate descriptor info to mbuf format */
1534                 for (j = 0; j < nb_dd; ++j) {
1535                         mb = rxep[j].mbuf;
1536                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1537                                   rxq->crc_len;
1538                         mb->data_len = pkt_len;
1539                         mb->pkt_len = pkt_len;
1540                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1541
1542                         /* convert descriptor fields to rte mbuf flags */
1543                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1544                                 vlan_flags);
1545                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1546                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1547                                         ((uint16_t)pkt_info[j]);
1548                         mb->ol_flags = pkt_flags;
1549                         mb->packet_type =
1550                                 ixgbe_rxd_pkt_info_to_pkt_type
1551                                         (pkt_info[j], rxq->pkt_type_mask);
1552
1553                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1554                                 mb->hash.rss = rte_le_to_cpu_32(
1555                                     rxdp[j].wb.lower.hi_dword.rss);
1556                         else if (pkt_flags & PKT_RX_FDIR) {
1557                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1558                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1559                                     IXGBE_ATR_HASH_MASK;
1560                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1561                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1562                         }
1563                 }
1564
1565                 /* Move mbuf pointers from the S/W ring to the stage */
1566                 for (j = 0; j < LOOK_AHEAD; ++j) {
1567                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1568                 }
1569
1570                 /* stop if all requested packets could not be received */
1571                 if (nb_dd != LOOK_AHEAD)
1572                         break;
1573         }
1574
1575         /* clear software ring entries so we can cleanup correctly */
1576         for (i = 0; i < nb_rx; ++i) {
1577                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1578         }
1579
1580
1581         return nb_rx;
1582 }
1583
1584 static inline int
1585 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1586 {
1587         volatile union ixgbe_adv_rx_desc *rxdp;
1588         struct ixgbe_rx_entry *rxep;
1589         struct rte_mbuf *mb;
1590         uint16_t alloc_idx;
1591         __le64 dma_addr;
1592         int diag, i;
1593
1594         /* allocate buffers in bulk directly into the S/W ring */
1595         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1596         rxep = &rxq->sw_ring[alloc_idx];
1597         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1598                                     rxq->rx_free_thresh);
1599         if (unlikely(diag != 0))
1600                 return -ENOMEM;
1601
1602         rxdp = &rxq->rx_ring[alloc_idx];
1603         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1604                 /* populate the static rte mbuf fields */
1605                 mb = rxep[i].mbuf;
1606                 if (reset_mbuf) {
1607                         mb->port = rxq->port_id;
1608                 }
1609
1610                 rte_mbuf_refcnt_set(mb, 1);
1611                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1612
1613                 /* populate the descriptors */
1614                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1615                 rxdp[i].read.hdr_addr = 0;
1616                 rxdp[i].read.pkt_addr = dma_addr;
1617         }
1618
1619         /* update state of internal queue structure */
1620         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1621         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1622                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1623
1624         /* no errors */
1625         return 0;
1626 }
1627
1628 static inline uint16_t
1629 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1630                          uint16_t nb_pkts)
1631 {
1632         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1633         int i;
1634
1635         /* how many packets are ready to return? */
1636         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1637
1638         /* copy mbuf pointers to the application's packet list */
1639         for (i = 0; i < nb_pkts; ++i)
1640                 rx_pkts[i] = stage[i];
1641
1642         /* update internal queue state */
1643         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1644         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1645
1646         return nb_pkts;
1647 }
1648
1649 static inline uint16_t
1650 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1651              uint16_t nb_pkts)
1652 {
1653         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1654         uint16_t nb_rx = 0;
1655
1656         /* Any previously recv'd pkts will be returned from the Rx stage */
1657         if (rxq->rx_nb_avail)
1658                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1659
1660         /* Scan the H/W ring for packets to receive */
1661         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1662
1663         /* update internal queue state */
1664         rxq->rx_next_avail = 0;
1665         rxq->rx_nb_avail = nb_rx;
1666         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1667
1668         /* if required, allocate new buffers to replenish descriptors */
1669         if (rxq->rx_tail > rxq->rx_free_trigger) {
1670                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1671
1672                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1673                         int i, j;
1674
1675                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1676                                    "queue_id=%u", (unsigned) rxq->port_id,
1677                                    (unsigned) rxq->queue_id);
1678
1679                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1680                                 rxq->rx_free_thresh;
1681
1682                         /*
1683                          * Need to rewind any previous receives if we cannot
1684                          * allocate new buffers to replenish the old ones.
1685                          */
1686                         rxq->rx_nb_avail = 0;
1687                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1688                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1689                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1690
1691                         return 0;
1692                 }
1693
1694                 /* update tail pointer */
1695                 rte_wmb();
1696                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1697                                             cur_free_trigger);
1698         }
1699
1700         if (rxq->rx_tail >= rxq->nb_rx_desc)
1701                 rxq->rx_tail = 0;
1702
1703         /* received any packets this loop? */
1704         if (rxq->rx_nb_avail)
1705                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1706
1707         return 0;
1708 }
1709
1710 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1711 uint16_t
1712 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1713                            uint16_t nb_pkts)
1714 {
1715         uint16_t nb_rx;
1716
1717         if (unlikely(nb_pkts == 0))
1718                 return 0;
1719
1720         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1721                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1722
1723         /* request is relatively large, chunk it up */
1724         nb_rx = 0;
1725         while (nb_pkts) {
1726                 uint16_t ret, n;
1727
1728                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1729                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1730                 nb_rx = (uint16_t)(nb_rx + ret);
1731                 nb_pkts = (uint16_t)(nb_pkts - ret);
1732                 if (ret < n)
1733                         break;
1734         }
1735
1736         return nb_rx;
1737 }
1738
1739 uint16_t
1740 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1741                 uint16_t nb_pkts)
1742 {
1743         struct ixgbe_rx_queue *rxq;
1744         volatile union ixgbe_adv_rx_desc *rx_ring;
1745         volatile union ixgbe_adv_rx_desc *rxdp;
1746         struct ixgbe_rx_entry *sw_ring;
1747         struct ixgbe_rx_entry *rxe;
1748         struct rte_mbuf *rxm;
1749         struct rte_mbuf *nmb;
1750         union ixgbe_adv_rx_desc rxd;
1751         uint64_t dma_addr;
1752         uint32_t staterr;
1753         uint32_t pkt_info;
1754         uint16_t pkt_len;
1755         uint16_t rx_id;
1756         uint16_t nb_rx;
1757         uint16_t nb_hold;
1758         uint64_t pkt_flags;
1759         uint64_t vlan_flags;
1760
1761         nb_rx = 0;
1762         nb_hold = 0;
1763         rxq = rx_queue;
1764         rx_id = rxq->rx_tail;
1765         rx_ring = rxq->rx_ring;
1766         sw_ring = rxq->sw_ring;
1767         vlan_flags = rxq->vlan_flags;
1768         while (nb_rx < nb_pkts) {
1769                 /*
1770                  * The order of operations here is important as the DD status
1771                  * bit must not be read after any other descriptor fields.
1772                  * rx_ring and rxdp are pointing to volatile data so the order
1773                  * of accesses cannot be reordered by the compiler. If they were
1774                  * not volatile, they could be reordered which could lead to
1775                  * using invalid descriptor fields when read from rxd.
1776                  */
1777                 rxdp = &rx_ring[rx_id];
1778                 staterr = rxdp->wb.upper.status_error;
1779                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1780                         break;
1781                 rxd = *rxdp;
1782
1783                 /*
1784                  * End of packet.
1785                  *
1786                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1787                  * is likely to be invalid and to be dropped by the various
1788                  * validation checks performed by the network stack.
1789                  *
1790                  * Allocate a new mbuf to replenish the RX ring descriptor.
1791                  * If the allocation fails:
1792                  *    - arrange for that RX descriptor to be the first one
1793                  *      being parsed the next time the receive function is
1794                  *      invoked [on the same queue].
1795                  *
1796                  *    - Stop parsing the RX ring and return immediately.
1797                  *
1798                  * This policy do not drop the packet received in the RX
1799                  * descriptor for which the allocation of a new mbuf failed.
1800                  * Thus, it allows that packet to be later retrieved if
1801                  * mbuf have been freed in the mean time.
1802                  * As a side effect, holding RX descriptors instead of
1803                  * systematically giving them back to the NIC may lead to
1804                  * RX ring exhaustion situations.
1805                  * However, the NIC can gracefully prevent such situations
1806                  * to happen by sending specific "back-pressure" flow control
1807                  * frames to its peer(s).
1808                  */
1809                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1810                            "ext_err_stat=0x%08x pkt_len=%u",
1811                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1812                            (unsigned) rx_id, (unsigned) staterr,
1813                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1814
1815                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1816                 if (nmb == NULL) {
1817                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1818                                    "queue_id=%u", (unsigned) rxq->port_id,
1819                                    (unsigned) rxq->queue_id);
1820                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1821                         break;
1822                 }
1823
1824                 nb_hold++;
1825                 rxe = &sw_ring[rx_id];
1826                 rx_id++;
1827                 if (rx_id == rxq->nb_rx_desc)
1828                         rx_id = 0;
1829
1830                 /* Prefetch next mbuf while processing current one. */
1831                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1832
1833                 /*
1834                  * When next RX descriptor is on a cache-line boundary,
1835                  * prefetch the next 4 RX descriptors and the next 8 pointers
1836                  * to mbufs.
1837                  */
1838                 if ((rx_id & 0x3) == 0) {
1839                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1840                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1841                 }
1842
1843                 rxm = rxe->mbuf;
1844                 rxe->mbuf = nmb;
1845                 dma_addr =
1846                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1847                 rxdp->read.hdr_addr = 0;
1848                 rxdp->read.pkt_addr = dma_addr;
1849
1850                 /*
1851                  * Initialize the returned mbuf.
1852                  * 1) setup generic mbuf fields:
1853                  *    - number of segments,
1854                  *    - next segment,
1855                  *    - packet length,
1856                  *    - RX port identifier.
1857                  * 2) integrate hardware offload data, if any:
1858                  *    - RSS flag & hash,
1859                  *    - IP checksum flag,
1860                  *    - VLAN TCI, if any,
1861                  *    - error flags.
1862                  */
1863                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1864                                       rxq->crc_len);
1865                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1866                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1867                 rxm->nb_segs = 1;
1868                 rxm->next = NULL;
1869                 rxm->pkt_len = pkt_len;
1870                 rxm->data_len = pkt_len;
1871                 rxm->port = rxq->port_id;
1872
1873                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1874                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1875                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1876
1877                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1878                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1879                 pkt_flags = pkt_flags |
1880                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1881                 rxm->ol_flags = pkt_flags;
1882                 rxm->packet_type =
1883                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1884                                                        rxq->pkt_type_mask);
1885
1886                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1887                         rxm->hash.rss = rte_le_to_cpu_32(
1888                                                 rxd.wb.lower.hi_dword.rss);
1889                 else if (pkt_flags & PKT_RX_FDIR) {
1890                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1891                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1892                                         IXGBE_ATR_HASH_MASK;
1893                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1894                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1895                 }
1896                 /*
1897                  * Store the mbuf address into the next entry of the array
1898                  * of returned packets.
1899                  */
1900                 rx_pkts[nb_rx++] = rxm;
1901         }
1902         rxq->rx_tail = rx_id;
1903
1904         /*
1905          * If the number of free RX descriptors is greater than the RX free
1906          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1907          * register.
1908          * Update the RDT with the value of the last processed RX descriptor
1909          * minus 1, to guarantee that the RDT register is never equal to the
1910          * RDH register, which creates a "full" ring situtation from the
1911          * hardware point of view...
1912          */
1913         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1914         if (nb_hold > rxq->rx_free_thresh) {
1915                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1916                            "nb_hold=%u nb_rx=%u",
1917                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1918                            (unsigned) rx_id, (unsigned) nb_hold,
1919                            (unsigned) nb_rx);
1920                 rx_id = (uint16_t) ((rx_id == 0) ?
1921                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1922                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1923                 nb_hold = 0;
1924         }
1925         rxq->nb_rx_hold = nb_hold;
1926         return nb_rx;
1927 }
1928
1929 /**
1930  * Detect an RSC descriptor.
1931  */
1932 static inline uint32_t
1933 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1934 {
1935         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1936                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1937 }
1938
1939 /**
1940  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1941  *
1942  * Fill the following info in the HEAD buffer of the Rx cluster:
1943  *    - RX port identifier
1944  *    - hardware offload data, if any:
1945  *      - RSS flag & hash
1946  *      - IP checksum flag
1947  *      - VLAN TCI, if any
1948  *      - error flags
1949  * @head HEAD of the packet cluster
1950  * @desc HW descriptor to get data from
1951  * @rxq Pointer to the Rx queue
1952  */
1953 static inline void
1954 ixgbe_fill_cluster_head_buf(
1955         struct rte_mbuf *head,
1956         union ixgbe_adv_rx_desc *desc,
1957         struct ixgbe_rx_queue *rxq,
1958         uint32_t staterr)
1959 {
1960         uint32_t pkt_info;
1961         uint64_t pkt_flags;
1962
1963         head->port = rxq->port_id;
1964
1965         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1966          * set in the pkt_flags field.
1967          */
1968         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1969         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1970         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1971         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1972         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1973         head->ol_flags = pkt_flags;
1974         head->packet_type =
1975                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1976
1977         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1978                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1979         else if (pkt_flags & PKT_RX_FDIR) {
1980                 head->hash.fdir.hash =
1981                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1982                                                           & IXGBE_ATR_HASH_MASK;
1983                 head->hash.fdir.id =
1984                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1985         }
1986 }
1987
1988 /**
1989  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1990  *
1991  * @rx_queue Rx queue handle
1992  * @rx_pkts table of received packets
1993  * @nb_pkts size of rx_pkts table
1994  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1995  *
1996  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1997  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1998  *
1999  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2000  * 1) When non-EOP RSC completion arrives:
2001  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2002  *       segment's data length.
2003  *    b) Set the "next" pointer of the current segment to point to the segment
2004  *       at the NEXTP index.
2005  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2006  *       in the sw_rsc_ring.
2007  * 2) When EOP arrives we just update the cluster's total length and offload
2008  *    flags and deliver the cluster up to the upper layers. In our case - put it
2009  *    in the rx_pkts table.
2010  *
2011  * Returns the number of received packets/clusters (according to the "bulk
2012  * receive" interface).
2013  */
2014 static inline uint16_t
2015 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2016                     bool bulk_alloc)
2017 {
2018         struct ixgbe_rx_queue *rxq = rx_queue;
2019         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2020         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2021         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2022         uint16_t rx_id = rxq->rx_tail;
2023         uint16_t nb_rx = 0;
2024         uint16_t nb_hold = rxq->nb_rx_hold;
2025         uint16_t prev_id = rxq->rx_tail;
2026
2027         while (nb_rx < nb_pkts) {
2028                 bool eop;
2029                 struct ixgbe_rx_entry *rxe;
2030                 struct ixgbe_scattered_rx_entry *sc_entry;
2031                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2032                 struct ixgbe_rx_entry *next_rxe = NULL;
2033                 struct rte_mbuf *first_seg;
2034                 struct rte_mbuf *rxm;
2035                 struct rte_mbuf *nmb = NULL;
2036                 union ixgbe_adv_rx_desc rxd;
2037                 uint16_t data_len;
2038                 uint16_t next_id;
2039                 volatile union ixgbe_adv_rx_desc *rxdp;
2040                 uint32_t staterr;
2041
2042 next_desc:
2043                 /*
2044                  * The code in this whole file uses the volatile pointer to
2045                  * ensure the read ordering of the status and the rest of the
2046                  * descriptor fields (on the compiler level only!!!). This is so
2047                  * UGLY - why not to just use the compiler barrier instead? DPDK
2048                  * even has the rte_compiler_barrier() for that.
2049                  *
2050                  * But most importantly this is just wrong because this doesn't
2051                  * ensure memory ordering in a general case at all. For
2052                  * instance, DPDK is supposed to work on Power CPUs where
2053                  * compiler barrier may just not be enough!
2054                  *
2055                  * I tried to write only this function properly to have a
2056                  * starting point (as a part of an LRO/RSC series) but the
2057                  * compiler cursed at me when I tried to cast away the
2058                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2059                  * keeping it the way it is for now.
2060                  *
2061                  * The code in this file is broken in so many other places and
2062                  * will just not work on a big endian CPU anyway therefore the
2063                  * lines below will have to be revisited together with the rest
2064                  * of the ixgbe PMD.
2065                  *
2066                  * TODO:
2067                  *    - Get rid of "volatile" and let the compiler do its job.
2068                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2069                  *      memory ordering below.
2070                  */
2071                 rxdp = &rx_ring[rx_id];
2072                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2073
2074                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2075                         break;
2076
2077                 rxd = *rxdp;
2078
2079                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2080                                   "staterr=0x%x data_len=%u",
2081                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2082                            rte_le_to_cpu_16(rxd.wb.upper.length));
2083
2084                 if (!bulk_alloc) {
2085                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2086                         if (nmb == NULL) {
2087                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2088                                                   "port_id=%u queue_id=%u",
2089                                            rxq->port_id, rxq->queue_id);
2090
2091                                 rte_eth_devices[rxq->port_id].data->
2092                                                         rx_mbuf_alloc_failed++;
2093                                 break;
2094                         }
2095                 } else if (nb_hold > rxq->rx_free_thresh) {
2096                         uint16_t next_rdt = rxq->rx_free_trigger;
2097
2098                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2099                                 rte_wmb();
2100                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2101                                                         rxq->rdt_reg_addr,
2102                                                         next_rdt);
2103                                 nb_hold -= rxq->rx_free_thresh;
2104                         } else {
2105                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2106                                                   "port_id=%u queue_id=%u",
2107                                            rxq->port_id, rxq->queue_id);
2108
2109                                 rte_eth_devices[rxq->port_id].data->
2110                                                         rx_mbuf_alloc_failed++;
2111                                 break;
2112                         }
2113                 }
2114
2115                 nb_hold++;
2116                 rxe = &sw_ring[rx_id];
2117                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2118
2119                 next_id = rx_id + 1;
2120                 if (next_id == rxq->nb_rx_desc)
2121                         next_id = 0;
2122
2123                 /* Prefetch next mbuf while processing current one. */
2124                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2125
2126                 /*
2127                  * When next RX descriptor is on a cache-line boundary,
2128                  * prefetch the next 4 RX descriptors and the next 4 pointers
2129                  * to mbufs.
2130                  */
2131                 if ((next_id & 0x3) == 0) {
2132                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2133                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2134                 }
2135
2136                 rxm = rxe->mbuf;
2137
2138                 if (!bulk_alloc) {
2139                         __le64 dma =
2140                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2141                         /*
2142                          * Update RX descriptor with the physical address of the
2143                          * new data buffer of the new allocated mbuf.
2144                          */
2145                         rxe->mbuf = nmb;
2146
2147                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2148                         rxdp->read.hdr_addr = 0;
2149                         rxdp->read.pkt_addr = dma;
2150                 } else
2151                         rxe->mbuf = NULL;
2152
2153                 /*
2154                  * Set data length & data buffer address of mbuf.
2155                  */
2156                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2157                 rxm->data_len = data_len;
2158
2159                 if (!eop) {
2160                         uint16_t nextp_id;
2161                         /*
2162                          * Get next descriptor index:
2163                          *  - For RSC it's in the NEXTP field.
2164                          *  - For a scattered packet - it's just a following
2165                          *    descriptor.
2166                          */
2167                         if (ixgbe_rsc_count(&rxd))
2168                                 nextp_id =
2169                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2170                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2171                         else
2172                                 nextp_id = next_id;
2173
2174                         next_sc_entry = &sw_sc_ring[nextp_id];
2175                         next_rxe = &sw_ring[nextp_id];
2176                         rte_ixgbe_prefetch(next_rxe);
2177                 }
2178
2179                 sc_entry = &sw_sc_ring[rx_id];
2180                 first_seg = sc_entry->fbuf;
2181                 sc_entry->fbuf = NULL;
2182
2183                 /*
2184                  * If this is the first buffer of the received packet,
2185                  * set the pointer to the first mbuf of the packet and
2186                  * initialize its context.
2187                  * Otherwise, update the total length and the number of segments
2188                  * of the current scattered packet, and update the pointer to
2189                  * the last mbuf of the current packet.
2190                  */
2191                 if (first_seg == NULL) {
2192                         first_seg = rxm;
2193                         first_seg->pkt_len = data_len;
2194                         first_seg->nb_segs = 1;
2195                 } else {
2196                         first_seg->pkt_len += data_len;
2197                         first_seg->nb_segs++;
2198                 }
2199
2200                 prev_id = rx_id;
2201                 rx_id = next_id;
2202
2203                 /*
2204                  * If this is not the last buffer of the received packet, update
2205                  * the pointer to the first mbuf at the NEXTP entry in the
2206                  * sw_sc_ring and continue to parse the RX ring.
2207                  */
2208                 if (!eop && next_rxe) {
2209                         rxm->next = next_rxe->mbuf;
2210                         next_sc_entry->fbuf = first_seg;
2211                         goto next_desc;
2212                 }
2213
2214                 /* Initialize the first mbuf of the returned packet */
2215                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2216
2217                 /*
2218                  * Deal with the case, when HW CRC srip is disabled.
2219                  * That can't happen when LRO is enabled, but still could
2220                  * happen for scattered RX mode.
2221                  */
2222                 first_seg->pkt_len -= rxq->crc_len;
2223                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2224                         struct rte_mbuf *lp;
2225
2226                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2227                                 ;
2228
2229                         first_seg->nb_segs--;
2230                         lp->data_len -= rxq->crc_len - rxm->data_len;
2231                         lp->next = NULL;
2232                         rte_pktmbuf_free_seg(rxm);
2233                 } else
2234                         rxm->data_len -= rxq->crc_len;
2235
2236                 /* Prefetch data of first segment, if configured to do so. */
2237                 rte_packet_prefetch((char *)first_seg->buf_addr +
2238                         first_seg->data_off);
2239
2240                 /*
2241                  * Store the mbuf address into the next entry of the array
2242                  * of returned packets.
2243                  */
2244                 rx_pkts[nb_rx++] = first_seg;
2245         }
2246
2247         /*
2248          * Record index of the next RX descriptor to probe.
2249          */
2250         rxq->rx_tail = rx_id;
2251
2252         /*
2253          * If the number of free RX descriptors is greater than the RX free
2254          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2255          * register.
2256          * Update the RDT with the value of the last processed RX descriptor
2257          * minus 1, to guarantee that the RDT register is never equal to the
2258          * RDH register, which creates a "full" ring situtation from the
2259          * hardware point of view...
2260          */
2261         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2262                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2263                            "nb_hold=%u nb_rx=%u",
2264                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2265
2266                 rte_wmb();
2267                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2268                 nb_hold = 0;
2269         }
2270
2271         rxq->nb_rx_hold = nb_hold;
2272         return nb_rx;
2273 }
2274
2275 uint16_t
2276 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2277                                  uint16_t nb_pkts)
2278 {
2279         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2280 }
2281
2282 uint16_t
2283 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2284                                uint16_t nb_pkts)
2285 {
2286         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2287 }
2288
2289 /*********************************************************************
2290  *
2291  *  Queue management functions
2292  *
2293  **********************************************************************/
2294
2295 static void __rte_cold
2296 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2297 {
2298         unsigned i;
2299
2300         if (txq->sw_ring != NULL) {
2301                 for (i = 0; i < txq->nb_tx_desc; i++) {
2302                         if (txq->sw_ring[i].mbuf != NULL) {
2303                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2304                                 txq->sw_ring[i].mbuf = NULL;
2305                         }
2306                 }
2307         }
2308 }
2309
2310 static int
2311 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2312 {
2313         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2314         uint16_t i, tx_last, tx_id;
2315         uint16_t nb_tx_free_last;
2316         uint16_t nb_tx_to_clean;
2317         uint32_t pkt_cnt;
2318
2319         /* Start free mbuf from the next of tx_tail */
2320         tx_last = txq->tx_tail;
2321         tx_id  = swr_ring[tx_last].next_id;
2322
2323         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2324                 return 0;
2325
2326         nb_tx_to_clean = txq->nb_tx_free;
2327         nb_tx_free_last = txq->nb_tx_free;
2328         if (!free_cnt)
2329                 free_cnt = txq->nb_tx_desc;
2330
2331         /* Loop through swr_ring to count the amount of
2332          * freeable mubfs and packets.
2333          */
2334         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2335                 for (i = 0; i < nb_tx_to_clean &&
2336                         pkt_cnt < free_cnt &&
2337                         tx_id != tx_last; i++) {
2338                         if (swr_ring[tx_id].mbuf != NULL) {
2339                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2340                                 swr_ring[tx_id].mbuf = NULL;
2341
2342                                 /*
2343                                  * last segment in the packet,
2344                                  * increment packet count
2345                                  */
2346                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2347                         }
2348
2349                         tx_id = swr_ring[tx_id].next_id;
2350                 }
2351
2352                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2353                         txq->nb_tx_free || tx_id == tx_last)
2354                         break;
2355
2356                 if (pkt_cnt < free_cnt) {
2357                         if (ixgbe_xmit_cleanup(txq))
2358                                 break;
2359
2360                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2361                         nb_tx_free_last = txq->nb_tx_free;
2362                 }
2363         }
2364
2365         return (int)pkt_cnt;
2366 }
2367
2368 static int
2369 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2370                         uint32_t free_cnt)
2371 {
2372         int i, n, cnt;
2373
2374         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2375                 free_cnt = txq->nb_tx_desc;
2376
2377         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2378
2379         for (i = 0; i < cnt; i += n) {
2380                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2381                         break;
2382
2383                 n = ixgbe_tx_free_bufs(txq);
2384
2385                 if (n == 0)
2386                         break;
2387         }
2388
2389         return i;
2390 }
2391
2392 static int
2393 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2394                         uint32_t free_cnt __rte_unused)
2395 {
2396         return -ENOTSUP;
2397 }
2398
2399 int
2400 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2401 {
2402         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2403         if (txq->offloads == 0 &&
2404 #ifdef RTE_LIBRTE_SECURITY
2405                         !(txq->using_ipsec) &&
2406 #endif
2407                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2408                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2409                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2410                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2411                                         txq->sw_ring_v != NULL)) {
2412                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2413                 } else {
2414                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2415                 }
2416         }
2417
2418         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2419 }
2420
2421 static void __rte_cold
2422 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2423 {
2424         if (txq != NULL &&
2425             txq->sw_ring != NULL)
2426                 rte_free(txq->sw_ring);
2427 }
2428
2429 static void __rte_cold
2430 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2431 {
2432         if (txq != NULL && txq->ops != NULL) {
2433                 txq->ops->release_mbufs(txq);
2434                 txq->ops->free_swring(txq);
2435                 rte_free(txq);
2436         }
2437 }
2438
2439 void __rte_cold
2440 ixgbe_dev_tx_queue_release(void *txq)
2441 {
2442         ixgbe_tx_queue_release(txq);
2443 }
2444
2445 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2446 static void __rte_cold
2447 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2448 {
2449         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2450         struct ixgbe_tx_entry *txe = txq->sw_ring;
2451         uint16_t prev, i;
2452
2453         /* Zero out HW ring memory */
2454         for (i = 0; i < txq->nb_tx_desc; i++) {
2455                 txq->tx_ring[i] = zeroed_desc;
2456         }
2457
2458         /* Initialize SW ring entries */
2459         prev = (uint16_t) (txq->nb_tx_desc - 1);
2460         for (i = 0; i < txq->nb_tx_desc; i++) {
2461                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2462
2463                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2464                 txe[i].mbuf = NULL;
2465                 txe[i].last_id = i;
2466                 txe[prev].next_id = i;
2467                 prev = i;
2468         }
2469
2470         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2471         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2472
2473         txq->tx_tail = 0;
2474         txq->nb_tx_used = 0;
2475         /*
2476          * Always allow 1 descriptor to be un-allocated to avoid
2477          * a H/W race condition
2478          */
2479         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2480         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2481         txq->ctx_curr = 0;
2482         memset((void *)&txq->ctx_cache, 0,
2483                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2484 }
2485
2486 static const struct ixgbe_txq_ops def_txq_ops = {
2487         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2488         .free_swring = ixgbe_tx_free_swring,
2489         .reset = ixgbe_reset_tx_queue,
2490 };
2491
2492 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2493  * the queue parameters. Used in tx_queue_setup by primary process and then
2494  * in dev_init by secondary process when attaching to an existing ethdev.
2495  */
2496 void __rte_cold
2497 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2498 {
2499         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2500         if ((txq->offloads == 0) &&
2501 #ifdef RTE_LIBRTE_SECURITY
2502                         !(txq->using_ipsec) &&
2503 #endif
2504                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2505                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2506                 dev->tx_pkt_prepare = NULL;
2507                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2508                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2509                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2510                                         ixgbe_txq_vec_setup(txq) == 0)) {
2511                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2512                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2513                 } else
2514                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2515         } else {
2516                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2517                 PMD_INIT_LOG(DEBUG,
2518                                 " - offloads = 0x%" PRIx64,
2519                                 txq->offloads);
2520                 PMD_INIT_LOG(DEBUG,
2521                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2522                                 (unsigned long)txq->tx_rs_thresh,
2523                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2524                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2525                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2526         }
2527 }
2528
2529 uint64_t
2530 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2531 {
2532         RTE_SET_USED(dev);
2533
2534         return 0;
2535 }
2536
2537 uint64_t
2538 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2539 {
2540         uint64_t tx_offload_capa;
2541         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2542
2543         tx_offload_capa =
2544                 DEV_TX_OFFLOAD_VLAN_INSERT |
2545                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2546                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2547                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2548                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2549                 DEV_TX_OFFLOAD_TCP_TSO     |
2550                 DEV_TX_OFFLOAD_MULTI_SEGS;
2551
2552         if (hw->mac.type == ixgbe_mac_82599EB ||
2553             hw->mac.type == ixgbe_mac_X540)
2554                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2555
2556         if (hw->mac.type == ixgbe_mac_X550 ||
2557             hw->mac.type == ixgbe_mac_X550EM_x ||
2558             hw->mac.type == ixgbe_mac_X550EM_a)
2559                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2560
2561 #ifdef RTE_LIBRTE_SECURITY
2562         if (dev->security_ctx)
2563                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2564 #endif
2565         return tx_offload_capa;
2566 }
2567
2568 int __rte_cold
2569 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2570                          uint16_t queue_idx,
2571                          uint16_t nb_desc,
2572                          unsigned int socket_id,
2573                          const struct rte_eth_txconf *tx_conf)
2574 {
2575         const struct rte_memzone *tz;
2576         struct ixgbe_tx_queue *txq;
2577         struct ixgbe_hw     *hw;
2578         uint16_t tx_rs_thresh, tx_free_thresh;
2579         uint64_t offloads;
2580
2581         PMD_INIT_FUNC_TRACE();
2582         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2583
2584         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2585
2586         /*
2587          * Validate number of transmit descriptors.
2588          * It must not exceed hardware maximum, and must be multiple
2589          * of IXGBE_ALIGN.
2590          */
2591         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2592                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2593                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2594                 return -EINVAL;
2595         }
2596
2597         /*
2598          * The following two parameters control the setting of the RS bit on
2599          * transmit descriptors.
2600          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2601          * descriptors have been used.
2602          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2603          * descriptors are used or if the number of descriptors required
2604          * to transmit a packet is greater than the number of free TX
2605          * descriptors.
2606          * The following constraints must be satisfied:
2607          *  tx_rs_thresh must be greater than 0.
2608          *  tx_rs_thresh must be less than the size of the ring minus 2.
2609          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2610          *  tx_rs_thresh must be a divisor of the ring size.
2611          *  tx_free_thresh must be greater than 0.
2612          *  tx_free_thresh must be less than the size of the ring minus 3.
2613          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2614          * One descriptor in the TX ring is used as a sentinel to avoid a
2615          * H/W race condition, hence the maximum threshold constraints.
2616          * When set to zero use default values.
2617          */
2618         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2619                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2620         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2621         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2622                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2623         if (tx_conf->tx_rs_thresh > 0)
2624                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2625         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2626                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2627                              "exceed nb_desc. (tx_rs_thresh=%u "
2628                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2629                              (unsigned int)tx_rs_thresh,
2630                              (unsigned int)tx_free_thresh,
2631                              (unsigned int)nb_desc,
2632                              (int)dev->data->port_id,
2633                              (int)queue_idx);
2634                 return -(EINVAL);
2635         }
2636         if (tx_rs_thresh >= (nb_desc - 2)) {
2637                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2638                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2639                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2640                         (int)dev->data->port_id, (int)queue_idx);
2641                 return -(EINVAL);
2642         }
2643         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2644                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2645                         "(tx_rs_thresh=%u port=%d queue=%d)",
2646                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2647                         (int)dev->data->port_id, (int)queue_idx);
2648                 return -(EINVAL);
2649         }
2650         if (tx_free_thresh >= (nb_desc - 3)) {
2651                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2652                              "tx_free_thresh must be less than the number of "
2653                              "TX descriptors minus 3. (tx_free_thresh=%u "
2654                              "port=%d queue=%d)",
2655                              (unsigned int)tx_free_thresh,
2656                              (int)dev->data->port_id, (int)queue_idx);
2657                 return -(EINVAL);
2658         }
2659         if (tx_rs_thresh > tx_free_thresh) {
2660                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2661                              "tx_free_thresh. (tx_free_thresh=%u "
2662                              "tx_rs_thresh=%u port=%d queue=%d)",
2663                              (unsigned int)tx_free_thresh,
2664                              (unsigned int)tx_rs_thresh,
2665                              (int)dev->data->port_id,
2666                              (int)queue_idx);
2667                 return -(EINVAL);
2668         }
2669         if ((nb_desc % tx_rs_thresh) != 0) {
2670                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2671                              "number of TX descriptors. (tx_rs_thresh=%u "
2672                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2673                              (int)dev->data->port_id, (int)queue_idx);
2674                 return -(EINVAL);
2675         }
2676
2677         /*
2678          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2679          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2680          * by the NIC and all descriptors are written back after the NIC
2681          * accumulates WTHRESH descriptors.
2682          */
2683         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2684                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2685                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2686                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2687                              (int)dev->data->port_id, (int)queue_idx);
2688                 return -(EINVAL);
2689         }
2690
2691         /* Free memory prior to re-allocation if needed... */
2692         if (dev->data->tx_queues[queue_idx] != NULL) {
2693                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2694                 dev->data->tx_queues[queue_idx] = NULL;
2695         }
2696
2697         /* First allocate the tx queue data structure */
2698         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2699                                  RTE_CACHE_LINE_SIZE, socket_id);
2700         if (txq == NULL)
2701                 return -ENOMEM;
2702
2703         /*
2704          * Allocate TX ring hardware descriptors. A memzone large enough to
2705          * handle the maximum ring size is allocated in order to allow for
2706          * resizing in later calls to the queue setup function.
2707          */
2708         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2709                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2710                         IXGBE_ALIGN, socket_id);
2711         if (tz == NULL) {
2712                 ixgbe_tx_queue_release(txq);
2713                 return -ENOMEM;
2714         }
2715
2716         txq->nb_tx_desc = nb_desc;
2717         txq->tx_rs_thresh = tx_rs_thresh;
2718         txq->tx_free_thresh = tx_free_thresh;
2719         txq->pthresh = tx_conf->tx_thresh.pthresh;
2720         txq->hthresh = tx_conf->tx_thresh.hthresh;
2721         txq->wthresh = tx_conf->tx_thresh.wthresh;
2722         txq->queue_id = queue_idx;
2723         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2724                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2725         txq->port_id = dev->data->port_id;
2726         txq->offloads = offloads;
2727         txq->ops = &def_txq_ops;
2728         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2729 #ifdef RTE_LIBRTE_SECURITY
2730         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2731                         DEV_TX_OFFLOAD_SECURITY);
2732 #endif
2733
2734         /*
2735          * Modification to set VFTDT for virtual function if vf is detected
2736          */
2737         if (hw->mac.type == ixgbe_mac_82599_vf ||
2738             hw->mac.type == ixgbe_mac_X540_vf ||
2739             hw->mac.type == ixgbe_mac_X550_vf ||
2740             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2741             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2742                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2743         else
2744                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2745
2746         txq->tx_ring_phys_addr = tz->iova;
2747         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2748
2749         /* Allocate software ring */
2750         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2751                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2752                                 RTE_CACHE_LINE_SIZE, socket_id);
2753         if (txq->sw_ring == NULL) {
2754                 ixgbe_tx_queue_release(txq);
2755                 return -ENOMEM;
2756         }
2757         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2758                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2759
2760         /* set up vector or scalar TX function as appropriate */
2761         ixgbe_set_tx_function(dev, txq);
2762
2763         txq->ops->reset(txq);
2764
2765         dev->data->tx_queues[queue_idx] = txq;
2766
2767
2768         return 0;
2769 }
2770
2771 /**
2772  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2773  *
2774  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2775  * in the sw_rsc_ring is not set to NULL but rather points to the next
2776  * mbuf of this RSC aggregation (that has not been completed yet and still
2777  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2778  * will just free first "nb_segs" segments of the cluster explicitly by calling
2779  * an rte_pktmbuf_free_seg().
2780  *
2781  * @m scattered cluster head
2782  */
2783 static void __rte_cold
2784 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2785 {
2786         uint16_t i, nb_segs = m->nb_segs;
2787         struct rte_mbuf *next_seg;
2788
2789         for (i = 0; i < nb_segs; i++) {
2790                 next_seg = m->next;
2791                 rte_pktmbuf_free_seg(m);
2792                 m = next_seg;
2793         }
2794 }
2795
2796 static void __rte_cold
2797 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2798 {
2799         unsigned i;
2800
2801         /* SSE Vector driver has a different way of releasing mbufs. */
2802         if (rxq->rx_using_sse) {
2803                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2804                 return;
2805         }
2806
2807         if (rxq->sw_ring != NULL) {
2808                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2809                         if (rxq->sw_ring[i].mbuf != NULL) {
2810                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2811                                 rxq->sw_ring[i].mbuf = NULL;
2812                         }
2813                 }
2814                 if (rxq->rx_nb_avail) {
2815                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2816                                 struct rte_mbuf *mb;
2817
2818                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2819                                 rte_pktmbuf_free_seg(mb);
2820                         }
2821                         rxq->rx_nb_avail = 0;
2822                 }
2823         }
2824
2825         if (rxq->sw_sc_ring)
2826                 for (i = 0; i < rxq->nb_rx_desc; i++)
2827                         if (rxq->sw_sc_ring[i].fbuf) {
2828                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2829                                 rxq->sw_sc_ring[i].fbuf = NULL;
2830                         }
2831 }
2832
2833 static void __rte_cold
2834 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2835 {
2836         if (rxq != NULL) {
2837                 ixgbe_rx_queue_release_mbufs(rxq);
2838                 rte_free(rxq->sw_ring);
2839                 rte_free(rxq->sw_sc_ring);
2840                 rte_free(rxq);
2841         }
2842 }
2843
2844 void __rte_cold
2845 ixgbe_dev_rx_queue_release(void *rxq)
2846 {
2847         ixgbe_rx_queue_release(rxq);
2848 }
2849
2850 /*
2851  * Check if Rx Burst Bulk Alloc function can be used.
2852  * Return
2853  *        0: the preconditions are satisfied and the bulk allocation function
2854  *           can be used.
2855  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2856  *           function must be used.
2857  */
2858 static inline int __rte_cold
2859 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2860 {
2861         int ret = 0;
2862
2863         /*
2864          * Make sure the following pre-conditions are satisfied:
2865          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2866          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2867          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2868          * Scattered packets are not supported.  This should be checked
2869          * outside of this function.
2870          */
2871         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2872                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2873                              "rxq->rx_free_thresh=%d, "
2874                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2875                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2876                 ret = -EINVAL;
2877         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2878                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2879                              "rxq->rx_free_thresh=%d, "
2880                              "rxq->nb_rx_desc=%d",
2881                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2882                 ret = -EINVAL;
2883         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2884                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2885                              "rxq->nb_rx_desc=%d, "
2886                              "rxq->rx_free_thresh=%d",
2887                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2888                 ret = -EINVAL;
2889         }
2890
2891         return ret;
2892 }
2893
2894 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2895 static void __rte_cold
2896 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2897 {
2898         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2899         unsigned i;
2900         uint16_t len = rxq->nb_rx_desc;
2901
2902         /*
2903          * By default, the Rx queue setup function allocates enough memory for
2904          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2905          * extra memory at the end of the descriptor ring to be zero'd out.
2906          */
2907         if (adapter->rx_bulk_alloc_allowed)
2908                 /* zero out extra memory */
2909                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2910
2911         /*
2912          * Zero out HW ring memory. Zero out extra memory at the end of
2913          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2914          * reads extra memory as zeros.
2915          */
2916         for (i = 0; i < len; i++) {
2917                 rxq->rx_ring[i] = zeroed_desc;
2918         }
2919
2920         /*
2921          * initialize extra software ring entries. Space for these extra
2922          * entries is always allocated
2923          */
2924         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2925         for (i = rxq->nb_rx_desc; i < len; ++i) {
2926                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2927         }
2928
2929         rxq->rx_nb_avail = 0;
2930         rxq->rx_next_avail = 0;
2931         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2932         rxq->rx_tail = 0;
2933         rxq->nb_rx_hold = 0;
2934         rxq->pkt_first_seg = NULL;
2935         rxq->pkt_last_seg = NULL;
2936
2937 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2938         rxq->rxrearm_start = 0;
2939         rxq->rxrearm_nb = 0;
2940 #endif
2941 }
2942
2943 static int
2944 ixgbe_is_vf(struct rte_eth_dev *dev)
2945 {
2946         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2947
2948         switch (hw->mac.type) {
2949         case ixgbe_mac_82599_vf:
2950         case ixgbe_mac_X540_vf:
2951         case ixgbe_mac_X550_vf:
2952         case ixgbe_mac_X550EM_x_vf:
2953         case ixgbe_mac_X550EM_a_vf:
2954                 return 1;
2955         default:
2956                 return 0;
2957         }
2958 }
2959
2960 uint64_t
2961 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2962 {
2963         uint64_t offloads = 0;
2964         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2965
2966         if (hw->mac.type != ixgbe_mac_82598EB)
2967                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2968
2969         return offloads;
2970 }
2971
2972 uint64_t
2973 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2974 {
2975         uint64_t offloads;
2976         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2977
2978         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2979                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2980                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2981                    DEV_RX_OFFLOAD_KEEP_CRC    |
2982                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2983                    DEV_RX_OFFLOAD_VLAN_FILTER |
2984                    DEV_RX_OFFLOAD_SCATTER |
2985                    DEV_RX_OFFLOAD_RSS_HASH;
2986
2987         if (hw->mac.type == ixgbe_mac_82598EB)
2988                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2989
2990         if (ixgbe_is_vf(dev) == 0)
2991                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2992
2993         /*
2994          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2995          * mode.
2996          */
2997         if ((hw->mac.type == ixgbe_mac_82599EB ||
2998              hw->mac.type == ixgbe_mac_X540 ||
2999              hw->mac.type == ixgbe_mac_X550) &&
3000             !RTE_ETH_DEV_SRIOV(dev).active)
3001                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
3002
3003         if (hw->mac.type == ixgbe_mac_82599EB ||
3004             hw->mac.type == ixgbe_mac_X540)
3005                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3006
3007         if (hw->mac.type == ixgbe_mac_X550 ||
3008             hw->mac.type == ixgbe_mac_X550EM_x ||
3009             hw->mac.type == ixgbe_mac_X550EM_a)
3010                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3011
3012 #ifdef RTE_LIBRTE_SECURITY
3013         if (dev->security_ctx)
3014                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3015 #endif
3016
3017         return offloads;
3018 }
3019
3020 int __rte_cold
3021 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3022                          uint16_t queue_idx,
3023                          uint16_t nb_desc,
3024                          unsigned int socket_id,
3025                          const struct rte_eth_rxconf *rx_conf,
3026                          struct rte_mempool *mp)
3027 {
3028         const struct rte_memzone *rz;
3029         struct ixgbe_rx_queue *rxq;
3030         struct ixgbe_hw     *hw;
3031         uint16_t len;
3032         struct ixgbe_adapter *adapter = dev->data->dev_private;
3033         uint64_t offloads;
3034
3035         PMD_INIT_FUNC_TRACE();
3036         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3037
3038         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3039
3040         /*
3041          * Validate number of receive descriptors.
3042          * It must not exceed hardware maximum, and must be multiple
3043          * of IXGBE_ALIGN.
3044          */
3045         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3046                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3047                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3048                 return -EINVAL;
3049         }
3050
3051         /* Free memory prior to re-allocation if needed... */
3052         if (dev->data->rx_queues[queue_idx] != NULL) {
3053                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3054                 dev->data->rx_queues[queue_idx] = NULL;
3055         }
3056
3057         /* First allocate the rx queue data structure */
3058         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3059                                  RTE_CACHE_LINE_SIZE, socket_id);
3060         if (rxq == NULL)
3061                 return -ENOMEM;
3062         rxq->mb_pool = mp;
3063         rxq->nb_rx_desc = nb_desc;
3064         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3065         rxq->queue_id = queue_idx;
3066         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3067                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3068         rxq->port_id = dev->data->port_id;
3069         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3070                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3071         else
3072                 rxq->crc_len = 0;
3073         rxq->drop_en = rx_conf->rx_drop_en;
3074         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3075         rxq->offloads = offloads;
3076
3077         /*
3078          * The packet type in RX descriptor is different for different NICs.
3079          * Some bits are used for x550 but reserved for other NICS.
3080          * So set different masks for different NICs.
3081          */
3082         if (hw->mac.type == ixgbe_mac_X550 ||
3083             hw->mac.type == ixgbe_mac_X550EM_x ||
3084             hw->mac.type == ixgbe_mac_X550EM_a ||
3085             hw->mac.type == ixgbe_mac_X550_vf ||
3086             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3087             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3088                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3089         else
3090                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3091
3092         /*
3093          * Allocate RX ring hardware descriptors. A memzone large enough to
3094          * handle the maximum ring size is allocated in order to allow for
3095          * resizing in later calls to the queue setup function.
3096          */
3097         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3098                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3099         if (rz == NULL) {
3100                 ixgbe_rx_queue_release(rxq);
3101                 return -ENOMEM;
3102         }
3103
3104         /*
3105          * Zero init all the descriptors in the ring.
3106          */
3107         memset(rz->addr, 0, RX_RING_SZ);
3108
3109         /*
3110          * Modified to setup VFRDT for Virtual Function
3111          */
3112         if (hw->mac.type == ixgbe_mac_82599_vf ||
3113             hw->mac.type == ixgbe_mac_X540_vf ||
3114             hw->mac.type == ixgbe_mac_X550_vf ||
3115             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3116             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3117                 rxq->rdt_reg_addr =
3118                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3119                 rxq->rdh_reg_addr =
3120                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3121         } else {
3122                 rxq->rdt_reg_addr =
3123                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3124                 rxq->rdh_reg_addr =
3125                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3126         }
3127
3128         rxq->rx_ring_phys_addr = rz->iova;
3129         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3130
3131         /*
3132          * Certain constraints must be met in order to use the bulk buffer
3133          * allocation Rx burst function. If any of Rx queues doesn't meet them
3134          * the feature should be disabled for the whole port.
3135          */
3136         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3137                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3138                                     "preconditions - canceling the feature for "
3139                                     "the whole port[%d]",
3140                              rxq->queue_id, rxq->port_id);
3141                 adapter->rx_bulk_alloc_allowed = false;
3142         }
3143
3144         /*
3145          * Allocate software ring. Allow for space at the end of the
3146          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3147          * function does not access an invalid memory region.
3148          */
3149         len = nb_desc;
3150         if (adapter->rx_bulk_alloc_allowed)
3151                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3152
3153         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3154                                           sizeof(struct ixgbe_rx_entry) * len,
3155                                           RTE_CACHE_LINE_SIZE, socket_id);
3156         if (!rxq->sw_ring) {
3157                 ixgbe_rx_queue_release(rxq);
3158                 return -ENOMEM;
3159         }
3160
3161         /*
3162          * Always allocate even if it's not going to be needed in order to
3163          * simplify the code.
3164          *
3165          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3166          * be requested in ixgbe_dev_rx_init(), which is called later from
3167          * dev_start() flow.
3168          */
3169         rxq->sw_sc_ring =
3170                 rte_zmalloc_socket("rxq->sw_sc_ring",
3171                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3172                                    RTE_CACHE_LINE_SIZE, socket_id);
3173         if (!rxq->sw_sc_ring) {
3174                 ixgbe_rx_queue_release(rxq);
3175                 return -ENOMEM;
3176         }
3177
3178         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3179                             "dma_addr=0x%"PRIx64,
3180                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3181                      rxq->rx_ring_phys_addr);
3182
3183         if (!rte_is_power_of_2(nb_desc)) {
3184                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3185                                     "preconditions - canceling the feature for "
3186                                     "the whole port[%d]",
3187                              rxq->queue_id, rxq->port_id);
3188                 adapter->rx_vec_allowed = false;
3189         } else
3190                 ixgbe_rxq_vec_setup(rxq);
3191
3192         dev->data->rx_queues[queue_idx] = rxq;
3193
3194         ixgbe_reset_rx_queue(adapter, rxq);
3195
3196         return 0;
3197 }
3198
3199 uint32_t
3200 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3201 {
3202 #define IXGBE_RXQ_SCAN_INTERVAL 4
3203         volatile union ixgbe_adv_rx_desc *rxdp;
3204         struct ixgbe_rx_queue *rxq;
3205         uint32_t desc = 0;
3206
3207         rxq = dev->data->rx_queues[rx_queue_id];
3208         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3209
3210         while ((desc < rxq->nb_rx_desc) &&
3211                 (rxdp->wb.upper.status_error &
3212                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3213                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3214                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3215                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3216                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3217                                 desc - rxq->nb_rx_desc]);
3218         }
3219
3220         return desc;
3221 }
3222
3223 int
3224 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3225 {
3226         volatile union ixgbe_adv_rx_desc *rxdp;
3227         struct ixgbe_rx_queue *rxq = rx_queue;
3228         uint32_t desc;
3229
3230         if (unlikely(offset >= rxq->nb_rx_desc))
3231                 return 0;
3232         desc = rxq->rx_tail + offset;
3233         if (desc >= rxq->nb_rx_desc)
3234                 desc -= rxq->nb_rx_desc;
3235
3236         rxdp = &rxq->rx_ring[desc];
3237         return !!(rxdp->wb.upper.status_error &
3238                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3239 }
3240
3241 int
3242 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3243 {
3244         struct ixgbe_rx_queue *rxq = rx_queue;
3245         volatile uint32_t *status;
3246         uint32_t nb_hold, desc;
3247
3248         if (unlikely(offset >= rxq->nb_rx_desc))
3249                 return -EINVAL;
3250
3251 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3252         if (rxq->rx_using_sse)
3253                 nb_hold = rxq->rxrearm_nb;
3254         else
3255 #endif
3256                 nb_hold = rxq->nb_rx_hold;
3257         if (offset >= rxq->nb_rx_desc - nb_hold)
3258                 return RTE_ETH_RX_DESC_UNAVAIL;
3259
3260         desc = rxq->rx_tail + offset;
3261         if (desc >= rxq->nb_rx_desc)
3262                 desc -= rxq->nb_rx_desc;
3263
3264         status = &rxq->rx_ring[desc].wb.upper.status_error;
3265         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3266                 return RTE_ETH_RX_DESC_DONE;
3267
3268         return RTE_ETH_RX_DESC_AVAIL;
3269 }
3270
3271 int
3272 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3273 {
3274         struct ixgbe_tx_queue *txq = tx_queue;
3275         volatile uint32_t *status;
3276         uint32_t desc;
3277
3278         if (unlikely(offset >= txq->nb_tx_desc))
3279                 return -EINVAL;
3280
3281         desc = txq->tx_tail + offset;
3282         /* go to next desc that has the RS bit */
3283         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3284                 txq->tx_rs_thresh;
3285         if (desc >= txq->nb_tx_desc) {
3286                 desc -= txq->nb_tx_desc;
3287                 if (desc >= txq->nb_tx_desc)
3288                         desc -= txq->nb_tx_desc;
3289         }
3290
3291         status = &txq->tx_ring[desc].wb.status;
3292         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3293                 return RTE_ETH_TX_DESC_DONE;
3294
3295         return RTE_ETH_TX_DESC_FULL;
3296 }
3297
3298 /*
3299  * Set up link loopback for X540/X550 mode Tx->Rx.
3300  */
3301 static inline void __rte_cold
3302 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3303 {
3304         uint32_t macc;
3305         PMD_INIT_FUNC_TRACE();
3306
3307         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3308
3309         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3310                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3311         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3312
3313         if (enable) {
3314                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3315                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3316                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3317                 macc |= IXGBE_MACC_FLU;
3318         } else {
3319                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3320                 macc &= ~IXGBE_MACC_FLU;
3321         }
3322
3323         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3324                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3325
3326         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3327 }
3328
3329 void __rte_cold
3330 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3331 {
3332         unsigned i;
3333         struct ixgbe_adapter *adapter = dev->data->dev_private;
3334         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3335
3336         PMD_INIT_FUNC_TRACE();
3337
3338         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3339                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3340
3341                 if (txq != NULL) {
3342                         txq->ops->release_mbufs(txq);
3343                         txq->ops->reset(txq);
3344                 }
3345         }
3346
3347         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3348                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3349
3350                 if (rxq != NULL) {
3351                         ixgbe_rx_queue_release_mbufs(rxq);
3352                         ixgbe_reset_rx_queue(adapter, rxq);
3353                 }
3354         }
3355         /* If loopback mode was enabled, reconfigure the link accordingly */
3356         if (dev->data->dev_conf.lpbk_mode != 0) {
3357                 if (hw->mac.type == ixgbe_mac_X540 ||
3358                      hw->mac.type == ixgbe_mac_X550 ||
3359                      hw->mac.type == ixgbe_mac_X550EM_x ||
3360                      hw->mac.type == ixgbe_mac_X550EM_a)
3361                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3362         }
3363 }
3364
3365 void
3366 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3367 {
3368         unsigned i;
3369
3370         PMD_INIT_FUNC_TRACE();
3371
3372         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3373                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3374                 dev->data->rx_queues[i] = NULL;
3375                 rte_eth_dma_zone_free(dev, "rx_ring", i);
3376         }
3377         dev->data->nb_rx_queues = 0;
3378
3379         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3380                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3381                 dev->data->tx_queues[i] = NULL;
3382                 rte_eth_dma_zone_free(dev, "tx_ring", i);
3383         }
3384         dev->data->nb_tx_queues = 0;
3385 }
3386
3387 /*********************************************************************
3388  *
3389  *  Device RX/TX init functions
3390  *
3391  **********************************************************************/
3392
3393 /**
3394  * Receive Side Scaling (RSS)
3395  * See section 7.1.2.8 in the following document:
3396  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3397  *
3398  * Principles:
3399  * The source and destination IP addresses of the IP header and the source
3400  * and destination ports of TCP/UDP headers, if any, of received packets are
3401  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3402  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3403  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3404  * RSS output index which is used as the RX queue index where to store the
3405  * received packets.
3406  * The following output is supplied in the RX write-back descriptor:
3407  *     - 32-bit result of the Microsoft RSS hash function,
3408  *     - 4-bit RSS type field.
3409  */
3410
3411 /*
3412  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3413  * Used as the default key.
3414  */
3415 static uint8_t rss_intel_key[40] = {
3416         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3417         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3418         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3419         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3420         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3421 };
3422
3423 static void
3424 ixgbe_rss_disable(struct rte_eth_dev *dev)
3425 {
3426         struct ixgbe_hw *hw;
3427         uint32_t mrqc;
3428         uint32_t mrqc_reg;
3429
3430         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3431         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3432         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3433         mrqc &= ~IXGBE_MRQC_RSSEN;
3434         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3435 }
3436
3437 static void
3438 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3439 {
3440         uint8_t  *hash_key;
3441         uint32_t mrqc;
3442         uint32_t rss_key;
3443         uint64_t rss_hf;
3444         uint16_t i;
3445         uint32_t mrqc_reg;
3446         uint32_t rssrk_reg;
3447
3448         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3449         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3450
3451         hash_key = rss_conf->rss_key;
3452         if (hash_key != NULL) {
3453                 /* Fill in RSS hash key */
3454                 for (i = 0; i < 10; i++) {
3455                         rss_key  = hash_key[(i * 4)];
3456                         rss_key |= hash_key[(i * 4) + 1] << 8;
3457                         rss_key |= hash_key[(i * 4) + 2] << 16;
3458                         rss_key |= hash_key[(i * 4) + 3] << 24;
3459                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3460                 }
3461         }
3462
3463         /* Set configured hashing protocols in MRQC register */
3464         rss_hf = rss_conf->rss_hf;
3465         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3466         if (rss_hf & ETH_RSS_IPV4)
3467                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3468         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3469                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3470         if (rss_hf & ETH_RSS_IPV6)
3471                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3472         if (rss_hf & ETH_RSS_IPV6_EX)
3473                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3474         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3475                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3476         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3477                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3478         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3479                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3480         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3481                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3482         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3483                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3484         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3485 }
3486
3487 int
3488 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3489                           struct rte_eth_rss_conf *rss_conf)
3490 {
3491         struct ixgbe_hw *hw;
3492         uint32_t mrqc;
3493         uint64_t rss_hf;
3494         uint32_t mrqc_reg;
3495
3496         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3497
3498         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3499                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3500                         "NIC.");
3501                 return -ENOTSUP;
3502         }
3503         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3504
3505         /*
3506          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3507          *     "RSS enabling cannot be done dynamically while it must be
3508          *      preceded by a software reset"
3509          * Before changing anything, first check that the update RSS operation
3510          * does not attempt to disable RSS, if RSS was enabled at
3511          * initialization time, or does not attempt to enable RSS, if RSS was
3512          * disabled at initialization time.
3513          */
3514         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3515         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3516         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3517                 if (rss_hf != 0) /* Enable RSS */
3518                         return -(EINVAL);
3519                 return 0; /* Nothing to do */
3520         }
3521         /* RSS enabled */
3522         if (rss_hf == 0) /* Disable RSS */
3523                 return -(EINVAL);
3524         ixgbe_hw_rss_hash_set(hw, rss_conf);
3525         return 0;
3526 }
3527
3528 int
3529 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3530                             struct rte_eth_rss_conf *rss_conf)
3531 {
3532         struct ixgbe_hw *hw;
3533         uint8_t *hash_key;
3534         uint32_t mrqc;
3535         uint32_t rss_key;
3536         uint64_t rss_hf;
3537         uint16_t i;
3538         uint32_t mrqc_reg;
3539         uint32_t rssrk_reg;
3540
3541         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3542         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3543         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3544         hash_key = rss_conf->rss_key;
3545         if (hash_key != NULL) {
3546                 /* Return RSS hash key */
3547                 for (i = 0; i < 10; i++) {
3548                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3549                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3550                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3551                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3552                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3553                 }
3554         }
3555
3556         /* Get RSS functions configured in MRQC register */
3557         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3558         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3559                 rss_conf->rss_hf = 0;
3560                 return 0;
3561         }
3562         rss_hf = 0;
3563         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3564                 rss_hf |= ETH_RSS_IPV4;
3565         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3566                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3567         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3568                 rss_hf |= ETH_RSS_IPV6;
3569         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3570                 rss_hf |= ETH_RSS_IPV6_EX;
3571         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3572                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3573         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3574                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3575         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3576                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3577         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3578                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3579         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3580                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3581         rss_conf->rss_hf = rss_hf;
3582         return 0;
3583 }
3584
3585 static void
3586 ixgbe_rss_configure(struct rte_eth_dev *dev)
3587 {
3588         struct rte_eth_rss_conf rss_conf;
3589         struct ixgbe_adapter *adapter;
3590         struct ixgbe_hw *hw;
3591         uint32_t reta;
3592         uint16_t i;
3593         uint16_t j;
3594         uint16_t sp_reta_size;
3595         uint32_t reta_reg;
3596
3597         PMD_INIT_FUNC_TRACE();
3598         adapter = dev->data->dev_private;
3599         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3600
3601         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3602
3603         /*
3604          * Fill in redirection table
3605          * The byte-swap is needed because NIC registers are in
3606          * little-endian order.
3607          */
3608         if (adapter->rss_reta_updated == 0) {
3609                 reta = 0;
3610                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3611                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3612
3613                         if (j == dev->data->nb_rx_queues)
3614                                 j = 0;
3615                         reta = (reta << 8) | j;
3616                         if ((i & 3) == 3)
3617                                 IXGBE_WRITE_REG(hw, reta_reg,
3618                                                 rte_bswap32(reta));
3619                 }
3620         }
3621
3622         /*
3623          * Configure the RSS key and the RSS protocols used to compute
3624          * the RSS hash of input packets.
3625          */
3626         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3627         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3628                 ixgbe_rss_disable(dev);
3629                 return;
3630         }
3631         if (rss_conf.rss_key == NULL)
3632                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3633         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3634 }
3635
3636 #define NUM_VFTA_REGISTERS 128
3637 #define NIC_RX_BUFFER_SIZE 0x200
3638 #define X550_RX_BUFFER_SIZE 0x180
3639
3640 static void
3641 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3642 {
3643         struct rte_eth_vmdq_dcb_conf *cfg;
3644         struct ixgbe_hw *hw;
3645         enum rte_eth_nb_pools num_pools;
3646         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3647         uint16_t pbsize;
3648         uint8_t nb_tcs; /* number of traffic classes */
3649         int i;
3650
3651         PMD_INIT_FUNC_TRACE();
3652         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3653         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3654         num_pools = cfg->nb_queue_pools;
3655         /* Check we have a valid number of pools */
3656         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3657                 ixgbe_rss_disable(dev);
3658                 return;
3659         }
3660         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3661         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3662
3663         /*
3664          * RXPBSIZE
3665          * split rx buffer up into sections, each for 1 traffic class
3666          */
3667         switch (hw->mac.type) {
3668         case ixgbe_mac_X550:
3669         case ixgbe_mac_X550EM_x:
3670         case ixgbe_mac_X550EM_a:
3671                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3672                 break;
3673         default:
3674                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3675                 break;
3676         }
3677         for (i = 0; i < nb_tcs; i++) {
3678                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3679
3680                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3681                 /* clear 10 bits. */
3682                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3683                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3684         }
3685         /* zero alloc all unused TCs */
3686         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3687                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3688
3689                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3690                 /* clear 10 bits. */
3691                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3692         }
3693
3694         /* MRQC: enable vmdq and dcb */
3695         mrqc = (num_pools == ETH_16_POOLS) ?
3696                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3697         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3698
3699         /* PFVTCTL: turn on virtualisation and set the default pool */
3700         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3701         if (cfg->enable_default_pool) {
3702                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3703         } else {
3704                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3705         }
3706
3707         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3708
3709         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3710         queue_mapping = 0;
3711         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3712                 /*
3713                  * mapping is done with 3 bits per priority,
3714                  * so shift by i*3 each time
3715                  */
3716                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3717
3718         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3719
3720         /* RTRPCS: DCB related */
3721         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3722
3723         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3724         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3725         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3726         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3727
3728         /* VFTA - enable all vlan filters */
3729         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3730                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3731         }
3732
3733         /* VFRE: pool enabling for receive - 16 or 32 */
3734         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3735                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3736
3737         /*
3738          * MPSAR - allow pools to read specific mac addresses
3739          * In this case, all pools should be able to read from mac addr 0
3740          */
3741         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3742         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3743
3744         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3745         for (i = 0; i < cfg->nb_pool_maps; i++) {
3746                 /* set vlan id in VF register and set the valid bit */
3747                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3748                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3749                 /*
3750                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3751                  * pools, we only need to use the first half of the register
3752                  * i.e. bits 0-31
3753                  */
3754                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3755         }
3756 }
3757
3758 /**
3759  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3760  * @dev: pointer to eth_dev structure
3761  * @dcb_config: pointer to ixgbe_dcb_config structure
3762  */
3763 static void
3764 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3765                        struct ixgbe_dcb_config *dcb_config)
3766 {
3767         uint32_t reg;
3768         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3769
3770         PMD_INIT_FUNC_TRACE();
3771         if (hw->mac.type != ixgbe_mac_82598EB) {
3772                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3773                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3774                 reg |= IXGBE_RTTDCS_ARBDIS;
3775                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3776
3777                 /* Enable DCB for Tx with 8 TCs */
3778                 if (dcb_config->num_tcs.pg_tcs == 8) {
3779                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3780                 } else {
3781                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3782                 }
3783                 if (dcb_config->vt_mode)
3784                         reg |= IXGBE_MTQC_VT_ENA;
3785                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3786
3787                 /* Enable the Tx desc arbiter */
3788                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3789                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3790                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3791
3792                 /* Enable Security TX Buffer IFG for DCB */
3793                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3794                 reg |= IXGBE_SECTX_DCB;
3795                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3796         }
3797 }
3798
3799 /**
3800  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3801  * @dev: pointer to rte_eth_dev structure
3802  * @dcb_config: pointer to ixgbe_dcb_config structure
3803  */
3804 static void
3805 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3806                         struct ixgbe_dcb_config *dcb_config)
3807 {
3808         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3809                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3810         struct ixgbe_hw *hw =
3811                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3812
3813         PMD_INIT_FUNC_TRACE();
3814         if (hw->mac.type != ixgbe_mac_82598EB)
3815                 /*PF VF Transmit Enable*/
3816                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3817                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3818
3819         /*Configure general DCB TX parameters*/
3820         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3821 }
3822
3823 static void
3824 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3825                         struct ixgbe_dcb_config *dcb_config)
3826 {
3827         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3828                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3829         struct ixgbe_dcb_tc_config *tc;
3830         uint8_t i, j;
3831
3832         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3833         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3834                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3835                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3836         } else {
3837                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3838                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3839         }
3840
3841         /* Initialize User Priority to Traffic Class mapping */
3842         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3843                 tc = &dcb_config->tc_config[j];
3844                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3845         }
3846
3847         /* User Priority to Traffic Class mapping */
3848         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3849                 j = vmdq_rx_conf->dcb_tc[i];
3850                 tc = &dcb_config->tc_config[j];
3851                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3852                                                 (uint8_t)(1 << i);
3853         }
3854 }
3855
3856 static void
3857 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3858                         struct ixgbe_dcb_config *dcb_config)
3859 {
3860         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3861                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3862         struct ixgbe_dcb_tc_config *tc;
3863         uint8_t i, j;
3864
3865         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3866         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3867                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3868                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3869         } else {
3870                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3871                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3872         }
3873
3874         /* Initialize User Priority to Traffic Class mapping */
3875         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3876                 tc = &dcb_config->tc_config[j];
3877                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3878         }
3879
3880         /* User Priority to Traffic Class mapping */
3881         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3882                 j = vmdq_tx_conf->dcb_tc[i];
3883                 tc = &dcb_config->tc_config[j];
3884                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3885                                                 (uint8_t)(1 << i);
3886         }
3887 }
3888
3889 static void
3890 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3891                 struct ixgbe_dcb_config *dcb_config)
3892 {
3893         struct rte_eth_dcb_rx_conf *rx_conf =
3894                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3895         struct ixgbe_dcb_tc_config *tc;
3896         uint8_t i, j;
3897
3898         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3899         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3900
3901         /* Initialize User Priority to Traffic Class mapping */
3902         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3903                 tc = &dcb_config->tc_config[j];
3904                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3905         }
3906
3907         /* User Priority to Traffic Class mapping */
3908         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3909                 j = rx_conf->dcb_tc[i];
3910                 tc = &dcb_config->tc_config[j];
3911                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3912                                                 (uint8_t)(1 << i);
3913         }
3914 }
3915
3916 static void
3917 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3918                 struct ixgbe_dcb_config *dcb_config)
3919 {
3920         struct rte_eth_dcb_tx_conf *tx_conf =
3921                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3922         struct ixgbe_dcb_tc_config *tc;
3923         uint8_t i, j;
3924
3925         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3926         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3927
3928         /* Initialize User Priority to Traffic Class mapping */
3929         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3930                 tc = &dcb_config->tc_config[j];
3931                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3932         }
3933
3934         /* User Priority to Traffic Class mapping */
3935         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3936                 j = tx_conf->dcb_tc[i];
3937                 tc = &dcb_config->tc_config[j];
3938                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3939                                                 (uint8_t)(1 << i);
3940         }
3941 }
3942
3943 /**
3944  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3945  * @dev: pointer to eth_dev structure
3946  * @dcb_config: pointer to ixgbe_dcb_config structure
3947  */
3948 static void
3949 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3950                        struct ixgbe_dcb_config *dcb_config)
3951 {
3952         uint32_t reg;
3953         uint32_t vlanctrl;
3954         uint8_t i;
3955         uint32_t q;
3956         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3957
3958         PMD_INIT_FUNC_TRACE();
3959         /*
3960          * Disable the arbiter before changing parameters
3961          * (always enable recycle mode; WSP)
3962          */
3963         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3964         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3965
3966         if (hw->mac.type != ixgbe_mac_82598EB) {
3967                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3968                 if (dcb_config->num_tcs.pg_tcs == 4) {
3969                         if (dcb_config->vt_mode)
3970                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3971                                         IXGBE_MRQC_VMDQRT4TCEN;
3972                         else {
3973                                 /* no matter the mode is DCB or DCB_RSS, just
3974                                  * set the MRQE to RSSXTCEN. RSS is controlled
3975                                  * by RSS_FIELD
3976                                  */
3977                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3978                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3979                                         IXGBE_MRQC_RTRSS4TCEN;
3980                         }
3981                 }
3982                 if (dcb_config->num_tcs.pg_tcs == 8) {
3983                         if (dcb_config->vt_mode)
3984                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3985                                         IXGBE_MRQC_VMDQRT8TCEN;
3986                         else {
3987                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3988                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3989                                         IXGBE_MRQC_RTRSS8TCEN;
3990                         }
3991                 }
3992
3993                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3994
3995                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3996                         /* Disable drop for all queues in VMDQ mode*/
3997                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3998                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3999                                                 (IXGBE_QDE_WRITE |
4000                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4001                 } else {
4002                         /* Enable drop for all queues in SRIOV mode */
4003                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4004                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4005                                                 (IXGBE_QDE_WRITE |
4006                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4007                                                  IXGBE_QDE_ENABLE));
4008                 }
4009         }
4010
4011         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4012         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4013         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4014         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4015
4016         /* VFTA - enable all vlan filters */
4017         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4018                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4019         }
4020
4021         /*
4022          * Configure Rx packet plane (recycle mode; WSP) and
4023          * enable arbiter
4024          */
4025         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4026         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4027 }
4028
4029 static void
4030 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4031                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4032 {
4033         switch (hw->mac.type) {
4034         case ixgbe_mac_82598EB:
4035                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4036                 break;
4037         case ixgbe_mac_82599EB:
4038         case ixgbe_mac_X540:
4039         case ixgbe_mac_X550:
4040         case ixgbe_mac_X550EM_x:
4041         case ixgbe_mac_X550EM_a:
4042                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4043                                                   tsa, map);
4044                 break;
4045         default:
4046                 break;
4047         }
4048 }
4049
4050 static void
4051 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4052                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4053 {
4054         switch (hw->mac.type) {
4055         case ixgbe_mac_82598EB:
4056                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4057                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4058                 break;
4059         case ixgbe_mac_82599EB:
4060         case ixgbe_mac_X540:
4061         case ixgbe_mac_X550:
4062         case ixgbe_mac_X550EM_x:
4063         case ixgbe_mac_X550EM_a:
4064                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4065                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4066                 break;
4067         default:
4068                 break;
4069         }
4070 }
4071
4072 #define DCB_RX_CONFIG  1
4073 #define DCB_TX_CONFIG  1
4074 #define DCB_TX_PB      1024
4075 /**
4076  * ixgbe_dcb_hw_configure - Enable DCB and configure
4077  * general DCB in VT mode and non-VT mode parameters
4078  * @dev: pointer to rte_eth_dev structure
4079  * @dcb_config: pointer to ixgbe_dcb_config structure
4080  */
4081 static int
4082 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4083                         struct ixgbe_dcb_config *dcb_config)
4084 {
4085         int     ret = 0;
4086         uint8_t i, pfc_en, nb_tcs;
4087         uint16_t pbsize, rx_buffer_size;
4088         uint8_t config_dcb_rx = 0;
4089         uint8_t config_dcb_tx = 0;
4090         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4091         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4092         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4093         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4094         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4095         struct ixgbe_dcb_tc_config *tc;
4096         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4097                 RTE_ETHER_CRC_LEN;
4098         struct ixgbe_hw *hw =
4099                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4100         struct ixgbe_bw_conf *bw_conf =
4101                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4102
4103         switch (dev->data->dev_conf.rxmode.mq_mode) {
4104         case ETH_MQ_RX_VMDQ_DCB:
4105                 dcb_config->vt_mode = true;
4106                 if (hw->mac.type != ixgbe_mac_82598EB) {
4107                         config_dcb_rx = DCB_RX_CONFIG;
4108                         /*
4109                          *get dcb and VT rx configuration parameters
4110                          *from rte_eth_conf
4111                          */
4112                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4113                         /*Configure general VMDQ and DCB RX parameters*/
4114                         ixgbe_vmdq_dcb_configure(dev);
4115                 }
4116                 break;
4117         case ETH_MQ_RX_DCB:
4118         case ETH_MQ_RX_DCB_RSS:
4119                 dcb_config->vt_mode = false;
4120                 config_dcb_rx = DCB_RX_CONFIG;
4121                 /* Get dcb TX configuration parameters from rte_eth_conf */
4122                 ixgbe_dcb_rx_config(dev, dcb_config);
4123                 /*Configure general DCB RX parameters*/
4124                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4125                 break;
4126         default:
4127                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4128                 break;
4129         }
4130         switch (dev->data->dev_conf.txmode.mq_mode) {
4131         case ETH_MQ_TX_VMDQ_DCB:
4132                 dcb_config->vt_mode = true;
4133                 config_dcb_tx = DCB_TX_CONFIG;
4134                 /* get DCB and VT TX configuration parameters
4135                  * from rte_eth_conf
4136                  */
4137                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4138                 /*Configure general VMDQ and DCB TX parameters*/
4139                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4140                 break;
4141
4142         case ETH_MQ_TX_DCB:
4143                 dcb_config->vt_mode = false;
4144                 config_dcb_tx = DCB_TX_CONFIG;
4145                 /*get DCB TX configuration parameters from rte_eth_conf*/
4146                 ixgbe_dcb_tx_config(dev, dcb_config);
4147                 /*Configure general DCB TX parameters*/
4148                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4149                 break;
4150         default:
4151                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4152                 break;
4153         }
4154
4155         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4156         /* Unpack map */
4157         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4158         if (nb_tcs == ETH_4_TCS) {
4159                 /* Avoid un-configured priority mapping to TC0 */
4160                 uint8_t j = 4;
4161                 uint8_t mask = 0xFF;
4162
4163                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4164                         mask = (uint8_t)(mask & (~(1 << map[i])));
4165                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4166                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4167                                 map[j++] = i;
4168                         mask >>= 1;
4169                 }
4170                 /* Re-configure 4 TCs BW */
4171                 for (i = 0; i < nb_tcs; i++) {
4172                         tc = &dcb_config->tc_config[i];
4173                         if (bw_conf->tc_num != nb_tcs)
4174                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4175                                         (uint8_t)(100 / nb_tcs);
4176                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4177                                                 (uint8_t)(100 / nb_tcs);
4178                 }
4179                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4180                         tc = &dcb_config->tc_config[i];
4181                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4182                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4183                 }
4184         } else {
4185                 /* Re-configure 8 TCs BW */
4186                 for (i = 0; i < nb_tcs; i++) {
4187                         tc = &dcb_config->tc_config[i];
4188                         if (bw_conf->tc_num != nb_tcs)
4189                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4190                                         (uint8_t)(100 / nb_tcs + (i & 1));
4191                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4192                                 (uint8_t)(100 / nb_tcs + (i & 1));
4193                 }
4194         }
4195
4196         switch (hw->mac.type) {
4197         case ixgbe_mac_X550:
4198         case ixgbe_mac_X550EM_x:
4199         case ixgbe_mac_X550EM_a:
4200                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4201                 break;
4202         default:
4203                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4204                 break;
4205         }
4206
4207         if (config_dcb_rx) {
4208                 /* Set RX buffer size */
4209                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4210                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4211
4212                 for (i = 0; i < nb_tcs; i++) {
4213                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4214                 }
4215                 /* zero alloc all unused TCs */
4216                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4217                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4218                 }
4219         }
4220         if (config_dcb_tx) {
4221                 /* Only support an equally distributed
4222                  *  Tx packet buffer strategy.
4223                  */
4224                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4225                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4226
4227                 for (i = 0; i < nb_tcs; i++) {
4228                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4229                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4230                 }
4231                 /* Clear unused TCs, if any, to zero buffer size*/
4232                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4233                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4234                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4235                 }
4236         }
4237
4238         /*Calculates traffic class credits*/
4239         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4240                                 IXGBE_DCB_TX_CONFIG);
4241         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4242                                 IXGBE_DCB_RX_CONFIG);
4243
4244         if (config_dcb_rx) {
4245                 /* Unpack CEE standard containers */
4246                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4247                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4248                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4249                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4250                 /* Configure PG(ETS) RX */
4251                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4252         }
4253
4254         if (config_dcb_tx) {
4255                 /* Unpack CEE standard containers */
4256                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4257                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4258                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4259                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4260                 /* Configure PG(ETS) TX */
4261                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4262         }
4263
4264         /*Configure queue statistics registers*/
4265         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4266
4267         /* Check if the PFC is supported */
4268         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4269                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4270                 for (i = 0; i < nb_tcs; i++) {
4271                         /*
4272                         * If the TC count is 8,and the default high_water is 48,
4273                         * the low_water is 16 as default.
4274                         */
4275                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4276                         hw->fc.low_water[i] = pbsize / 4;
4277                         /* Enable pfc for this TC */
4278                         tc = &dcb_config->tc_config[i];
4279                         tc->pfc = ixgbe_dcb_pfc_enabled;
4280                 }
4281                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4282                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4283                         pfc_en &= 0x0F;
4284                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4285         }
4286
4287         return ret;
4288 }
4289
4290 /**
4291  * ixgbe_configure_dcb - Configure DCB  Hardware
4292  * @dev: pointer to rte_eth_dev
4293  */
4294 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4295 {
4296         struct ixgbe_dcb_config *dcb_cfg =
4297                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4298         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4299
4300         PMD_INIT_FUNC_TRACE();
4301
4302         /* check support mq_mode for DCB */
4303         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4304             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4305             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4306                 return;
4307
4308         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4309                 return;
4310
4311         /** Configure DCB hardware **/
4312         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4313 }
4314
4315 /*
4316  * VMDq only support for 10 GbE NIC.
4317  */
4318 static void
4319 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4320 {
4321         struct rte_eth_vmdq_rx_conf *cfg;
4322         struct ixgbe_hw *hw;
4323         enum rte_eth_nb_pools num_pools;
4324         uint32_t mrqc, vt_ctl, vlanctrl;
4325         uint32_t vmolr = 0;
4326         int i;
4327
4328         PMD_INIT_FUNC_TRACE();
4329         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4330         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4331         num_pools = cfg->nb_queue_pools;
4332
4333         ixgbe_rss_disable(dev);
4334
4335         /* MRQC: enable vmdq */
4336         mrqc = IXGBE_MRQC_VMDQEN;
4337         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4338
4339         /* PFVTCTL: turn on virtualisation and set the default pool */
4340         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4341         if (cfg->enable_default_pool)
4342                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4343         else
4344                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4345
4346         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4347
4348         for (i = 0; i < (int)num_pools; i++) {
4349                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4350                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4351         }
4352
4353         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4354         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4355         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4356         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4357
4358         /* VFTA - enable all vlan filters */
4359         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4360                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4361
4362         /* VFRE: pool enabling for receive - 64 */
4363         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4364         if (num_pools == ETH_64_POOLS)
4365                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4366
4367         /*
4368          * MPSAR - allow pools to read specific mac addresses
4369          * In this case, all pools should be able to read from mac addr 0
4370          */
4371         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4372         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4373
4374         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4375         for (i = 0; i < cfg->nb_pool_maps; i++) {
4376                 /* set vlan id in VF register and set the valid bit */
4377                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4378                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4379                 /*
4380                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4381                  * pools, we only need to use the first half of the register
4382                  * i.e. bits 0-31
4383                  */
4384                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4385                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4386                                         (cfg->pool_map[i].pools & UINT32_MAX));
4387                 else
4388                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4389                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4390
4391         }
4392
4393         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4394         if (cfg->enable_loop_back) {
4395                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4396                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4397                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4398         }
4399
4400         IXGBE_WRITE_FLUSH(hw);
4401 }
4402
4403 /*
4404  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4405  * @hw: pointer to hardware structure
4406  */
4407 static void
4408 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4409 {
4410         uint32_t reg;
4411         uint32_t q;
4412
4413         PMD_INIT_FUNC_TRACE();
4414         /*PF VF Transmit Enable*/
4415         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4416         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4417
4418         /* Disable the Tx desc arbiter so that MTQC can be changed */
4419         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4420         reg |= IXGBE_RTTDCS_ARBDIS;
4421         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4422
4423         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4424         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4425
4426         /* Disable drop for all queues */
4427         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4428                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4429                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4430
4431         /* Enable the Tx desc arbiter */
4432         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4433         reg &= ~IXGBE_RTTDCS_ARBDIS;
4434         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4435
4436         IXGBE_WRITE_FLUSH(hw);
4437 }
4438
4439 static int __rte_cold
4440 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4441 {
4442         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4443         uint64_t dma_addr;
4444         unsigned int i;
4445
4446         /* Initialize software ring entries */
4447         for (i = 0; i < rxq->nb_rx_desc; i++) {
4448                 volatile union ixgbe_adv_rx_desc *rxd;
4449                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4450
4451                 if (mbuf == NULL) {
4452                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4453                                      (unsigned) rxq->queue_id);
4454                         return -ENOMEM;
4455                 }
4456
4457                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4458                 mbuf->port = rxq->port_id;
4459
4460                 dma_addr =
4461                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4462                 rxd = &rxq->rx_ring[i];
4463                 rxd->read.hdr_addr = 0;
4464                 rxd->read.pkt_addr = dma_addr;
4465                 rxe[i].mbuf = mbuf;
4466         }
4467
4468         return 0;
4469 }
4470
4471 static int
4472 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4473 {
4474         struct ixgbe_hw *hw;
4475         uint32_t mrqc;
4476
4477         ixgbe_rss_configure(dev);
4478
4479         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4480
4481         /* MRQC: enable VF RSS */
4482         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4483         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4484         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4485         case ETH_64_POOLS:
4486                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4487                 break;
4488
4489         case ETH_32_POOLS:
4490                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4491                 break;
4492
4493         default:
4494                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4495                 return -EINVAL;
4496         }
4497
4498         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4499
4500         return 0;
4501 }
4502
4503 static int
4504 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4505 {
4506         struct ixgbe_hw *hw =
4507                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4508
4509         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4510         case ETH_64_POOLS:
4511                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4512                         IXGBE_MRQC_VMDQEN);
4513                 break;
4514
4515         case ETH_32_POOLS:
4516                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4517                         IXGBE_MRQC_VMDQRT4TCEN);
4518                 break;
4519
4520         case ETH_16_POOLS:
4521                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4522                         IXGBE_MRQC_VMDQRT8TCEN);
4523                 break;
4524         default:
4525                 PMD_INIT_LOG(ERR,
4526                         "invalid pool number in IOV mode");
4527                 break;
4528         }
4529         return 0;
4530 }
4531
4532 static int
4533 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4534 {
4535         struct ixgbe_hw *hw =
4536                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4537
4538         if (hw->mac.type == ixgbe_mac_82598EB)
4539                 return 0;
4540
4541         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4542                 /*
4543                  * SRIOV inactive scheme
4544                  * any DCB/RSS w/o VMDq multi-queue setting
4545                  */
4546                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4547                 case ETH_MQ_RX_RSS:
4548                 case ETH_MQ_RX_DCB_RSS:
4549                 case ETH_MQ_RX_VMDQ_RSS:
4550                         ixgbe_rss_configure(dev);
4551                         break;
4552
4553                 case ETH_MQ_RX_VMDQ_DCB:
4554                         ixgbe_vmdq_dcb_configure(dev);
4555                         break;
4556
4557                 case ETH_MQ_RX_VMDQ_ONLY:
4558                         ixgbe_vmdq_rx_hw_configure(dev);
4559                         break;
4560
4561                 case ETH_MQ_RX_NONE:
4562                 default:
4563                         /* if mq_mode is none, disable rss mode.*/
4564                         ixgbe_rss_disable(dev);
4565                         break;
4566                 }
4567         } else {
4568                 /* SRIOV active scheme
4569                  * Support RSS together with SRIOV.
4570                  */
4571                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4572                 case ETH_MQ_RX_RSS:
4573                 case ETH_MQ_RX_VMDQ_RSS:
4574                         ixgbe_config_vf_rss(dev);
4575                         break;
4576                 case ETH_MQ_RX_VMDQ_DCB:
4577                 case ETH_MQ_RX_DCB:
4578                 /* In SRIOV, the configuration is the same as VMDq case */
4579                         ixgbe_vmdq_dcb_configure(dev);
4580                         break;
4581                 /* DCB/RSS together with SRIOV is not supported */
4582                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4583                 case ETH_MQ_RX_DCB_RSS:
4584                         PMD_INIT_LOG(ERR,
4585                                 "Could not support DCB/RSS with VMDq & SRIOV");
4586                         return -1;
4587                 default:
4588                         ixgbe_config_vf_default(dev);
4589                         break;
4590                 }
4591         }
4592
4593         return 0;
4594 }
4595
4596 static int
4597 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4598 {
4599         struct ixgbe_hw *hw =
4600                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4601         uint32_t mtqc;
4602         uint32_t rttdcs;
4603
4604         if (hw->mac.type == ixgbe_mac_82598EB)
4605                 return 0;
4606
4607         /* disable arbiter before setting MTQC */
4608         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4609         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4610         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4611
4612         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4613                 /*
4614                  * SRIOV inactive scheme
4615                  * any DCB w/o VMDq multi-queue setting
4616                  */
4617                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4618                         ixgbe_vmdq_tx_hw_configure(hw);
4619                 else {
4620                         mtqc = IXGBE_MTQC_64Q_1PB;
4621                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4622                 }
4623         } else {
4624                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4625
4626                 /*
4627                  * SRIOV active scheme
4628                  * FIXME if support DCB together with VMDq & SRIOV
4629                  */
4630                 case ETH_64_POOLS:
4631                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4632                         break;
4633                 case ETH_32_POOLS:
4634                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4635                         break;
4636                 case ETH_16_POOLS:
4637                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4638                                 IXGBE_MTQC_8TC_8TQ;
4639                         break;
4640                 default:
4641                         mtqc = IXGBE_MTQC_64Q_1PB;
4642                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4643                 }
4644                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4645         }
4646
4647         /* re-enable arbiter */
4648         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4649         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4650
4651         return 0;
4652 }
4653
4654 /**
4655  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4656  *
4657  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4658  * spec rev. 3.0 chapter 8.2.3.8.13.
4659  *
4660  * @pool Memory pool of the Rx queue
4661  */
4662 static inline uint32_t
4663 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4664 {
4665         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4666
4667         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4668         uint16_t maxdesc =
4669                 RTE_IPV4_MAX_PKT_LEN /
4670                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4671
4672         if (maxdesc >= 16)
4673                 return IXGBE_RSCCTL_MAXDESC_16;
4674         else if (maxdesc >= 8)
4675                 return IXGBE_RSCCTL_MAXDESC_8;
4676         else if (maxdesc >= 4)
4677                 return IXGBE_RSCCTL_MAXDESC_4;
4678         else
4679                 return IXGBE_RSCCTL_MAXDESC_1;
4680 }
4681
4682 /**
4683  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4684  * interrupt
4685  *
4686  * (Taken from FreeBSD tree)
4687  * (yes this is all very magic and confusing :)
4688  *
4689  * @dev port handle
4690  * @entry the register array entry
4691  * @vector the MSIX vector for this queue
4692  * @type RX/TX/MISC
4693  */
4694 static void
4695 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4696 {
4697         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4698         u32 ivar, index;
4699
4700         vector |= IXGBE_IVAR_ALLOC_VAL;
4701
4702         switch (hw->mac.type) {
4703
4704         case ixgbe_mac_82598EB:
4705                 if (type == -1)
4706                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4707                 else
4708                         entry += (type * 64);
4709                 index = (entry >> 2) & 0x1F;
4710                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4711                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4712                 ivar |= (vector << (8 * (entry & 0x3)));
4713                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4714                 break;
4715
4716         case ixgbe_mac_82599EB:
4717         case ixgbe_mac_X540:
4718                 if (type == -1) { /* MISC IVAR */
4719                         index = (entry & 1) * 8;
4720                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4721                         ivar &= ~(0xFF << index);
4722                         ivar |= (vector << index);
4723                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4724                 } else {        /* RX/TX IVARS */
4725                         index = (16 * (entry & 1)) + (8 * type);
4726                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4727                         ivar &= ~(0xFF << index);
4728                         ivar |= (vector << index);
4729                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4730                 }
4731
4732                 break;
4733
4734         default:
4735                 break;
4736         }
4737 }
4738
4739 void __rte_cold
4740 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4741 {
4742         uint16_t i, rx_using_sse;
4743         struct ixgbe_adapter *adapter = dev->data->dev_private;
4744
4745         /*
4746          * In order to allow Vector Rx there are a few configuration
4747          * conditions to be met and Rx Bulk Allocation should be allowed.
4748          */
4749         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4750             !adapter->rx_bulk_alloc_allowed ||
4751                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4752                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4753                                     "preconditions",
4754                              dev->data->port_id);
4755
4756                 adapter->rx_vec_allowed = false;
4757         }
4758
4759         /*
4760          * Initialize the appropriate LRO callback.
4761          *
4762          * If all queues satisfy the bulk allocation preconditions
4763          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4764          * Otherwise use a single allocation version.
4765          */
4766         if (dev->data->lro) {
4767                 if (adapter->rx_bulk_alloc_allowed) {
4768                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4769                                            "allocation version");
4770                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4771                 } else {
4772                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4773                                            "allocation version");
4774                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4775                 }
4776         } else if (dev->data->scattered_rx) {
4777                 /*
4778                  * Set the non-LRO scattered callback: there are Vector and
4779                  * single allocation versions.
4780                  */
4781                 if (adapter->rx_vec_allowed) {
4782                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4783                                             "callback (port=%d).",
4784                                      dev->data->port_id);
4785
4786                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4787                 } else if (adapter->rx_bulk_alloc_allowed) {
4788                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4789                                            "allocation callback (port=%d).",
4790                                      dev->data->port_id);
4791                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4792                 } else {
4793                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4794                                             "single allocation) "
4795                                             "Scattered Rx callback "
4796                                             "(port=%d).",
4797                                      dev->data->port_id);
4798
4799                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4800                 }
4801         /*
4802          * Below we set "simple" callbacks according to port/queues parameters.
4803          * If parameters allow we are going to choose between the following
4804          * callbacks:
4805          *    - Vector
4806          *    - Bulk Allocation
4807          *    - Single buffer allocation (the simplest one)
4808          */
4809         } else if (adapter->rx_vec_allowed) {
4810                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4811                                     "burst size no less than %d (port=%d).",
4812                              RTE_IXGBE_DESCS_PER_LOOP,
4813                              dev->data->port_id);
4814
4815                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4816         } else if (adapter->rx_bulk_alloc_allowed) {
4817                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4818                                     "satisfied. Rx Burst Bulk Alloc function "
4819                                     "will be used on port=%d.",
4820                              dev->data->port_id);
4821
4822                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4823         } else {
4824                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4825                                     "satisfied, or Scattered Rx is requested "
4826                                     "(port=%d).",
4827                              dev->data->port_id);
4828
4829                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4830         }
4831
4832         /* Propagate information about RX function choice through all queues. */
4833
4834         rx_using_sse =
4835                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4836                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4837
4838         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4839                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4840
4841                 rxq->rx_using_sse = rx_using_sse;
4842 #ifdef RTE_LIBRTE_SECURITY
4843                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4844                                 DEV_RX_OFFLOAD_SECURITY);
4845 #endif
4846         }
4847 }
4848
4849 /**
4850  * ixgbe_set_rsc - configure RSC related port HW registers
4851  *
4852  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4853  * of 82599 Spec (x540 configuration is virtually the same).
4854  *
4855  * @dev port handle
4856  *
4857  * Returns 0 in case of success or a non-zero error code
4858  */
4859 static int
4860 ixgbe_set_rsc(struct rte_eth_dev *dev)
4861 {
4862         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4863         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4864         struct rte_eth_dev_info dev_info = { 0 };
4865         bool rsc_capable = false;
4866         uint16_t i;
4867         uint32_t rdrxctl;
4868         uint32_t rfctl;
4869
4870         /* Sanity check */
4871         dev->dev_ops->dev_infos_get(dev, &dev_info);
4872         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4873                 rsc_capable = true;
4874
4875         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4876                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4877                                    "support it");
4878                 return -EINVAL;
4879         }
4880
4881         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4882
4883         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4884              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4885                 /*
4886                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4887                  * 3.0 RSC configuration requires HW CRC stripping being
4888                  * enabled. If user requested both HW CRC stripping off
4889                  * and RSC on - return an error.
4890                  */
4891                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4892                                     "is disabled");
4893                 return -EINVAL;
4894         }
4895
4896         /* RFCTL configuration  */
4897         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4898         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4899                 /*
4900                  * Since NFS packets coalescing is not supported - clear
4901                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4902                  * enabled.
4903                  */
4904                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4905                            IXGBE_RFCTL_NFSR_DIS);
4906         else
4907                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4908         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4909
4910         /* If LRO hasn't been requested - we are done here. */
4911         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4912                 return 0;
4913
4914         /* Set RDRXCTL.RSCACKC bit */
4915         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4916         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4917         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4918
4919         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4920         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4921                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4922                 uint32_t srrctl =
4923                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4924                 uint32_t rscctl =
4925                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4926                 uint32_t psrtype =
4927                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4928                 uint32_t eitr =
4929                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4930
4931                 /*
4932                  * ixgbe PMD doesn't support header-split at the moment.
4933                  *
4934                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4935                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4936                  * should be configured even if header split is not
4937                  * enabled. We will configure it 128 bytes following the
4938                  * recommendation in the spec.
4939                  */
4940                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4941                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4942                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4943
4944                 /*
4945                  * TODO: Consider setting the Receive Descriptor Minimum
4946                  * Threshold Size for an RSC case. This is not an obviously
4947                  * beneficiary option but the one worth considering...
4948                  */
4949
4950                 rscctl |= IXGBE_RSCCTL_RSCEN;
4951                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4952                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4953
4954                 /*
4955                  * RSC: Set ITR interval corresponding to 2K ints/s.
4956                  *
4957                  * Full-sized RSC aggregations for a 10Gb/s link will
4958                  * arrive at about 20K aggregation/s rate.
4959                  *
4960                  * 2K inst/s rate will make only 10% of the
4961                  * aggregations to be closed due to the interrupt timer
4962                  * expiration for a streaming at wire-speed case.
4963                  *
4964                  * For a sparse streaming case this setting will yield
4965                  * at most 500us latency for a single RSC aggregation.
4966                  */
4967                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4968                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4969                 eitr |= IXGBE_EITR_CNT_WDIS;
4970
4971                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4972                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4973                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4974                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4975
4976                 /*
4977                  * RSC requires the mapping of the queue to the
4978                  * interrupt vector.
4979                  */
4980                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4981         }
4982
4983         dev->data->lro = 1;
4984
4985         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4986
4987         return 0;
4988 }
4989
4990 /*
4991  * Initializes Receive Unit.
4992  */
4993 int __rte_cold
4994 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4995 {
4996         struct ixgbe_hw     *hw;
4997         struct ixgbe_rx_queue *rxq;
4998         uint64_t bus_addr;
4999         uint32_t rxctrl;
5000         uint32_t fctrl;
5001         uint32_t hlreg0;
5002         uint32_t maxfrs;
5003         uint32_t srrctl;
5004         uint32_t rdrxctl;
5005         uint32_t rxcsum;
5006         uint16_t buf_size;
5007         uint16_t i;
5008         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5009         int rc;
5010
5011         PMD_INIT_FUNC_TRACE();
5012         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5013
5014         /*
5015          * Make sure receives are disabled while setting
5016          * up the RX context (registers, descriptor rings, etc.).
5017          */
5018         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5019         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5020
5021         /* Enable receipt of broadcasted frames */
5022         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5023         fctrl |= IXGBE_FCTRL_BAM;
5024         fctrl |= IXGBE_FCTRL_DPF;
5025         fctrl |= IXGBE_FCTRL_PMCF;
5026         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5027
5028         /*
5029          * Configure CRC stripping, if any.
5030          */
5031         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5032         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5033                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5034         else
5035                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5036
5037         /*
5038          * Configure jumbo frame support, if any.
5039          */
5040         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5041                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5042                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5043                 maxfrs &= 0x0000FFFF;
5044                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5045                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5046         } else
5047                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5048
5049         /*
5050          * If loopback mode is configured, set LPBK bit.
5051          */
5052         if (dev->data->dev_conf.lpbk_mode != 0) {
5053                 rc = ixgbe_check_supported_loopback_mode(dev);
5054                 if (rc < 0) {
5055                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5056                         return rc;
5057                 }
5058                 hlreg0 |= IXGBE_HLREG0_LPBK;
5059         } else {
5060                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5061         }
5062
5063         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5064
5065         /*
5066          * Assume no header split and no VLAN strip support
5067          * on any Rx queue first .
5068          */
5069         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5070         /* Setup RX queues */
5071         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5072                 rxq = dev->data->rx_queues[i];
5073
5074                 /*
5075                  * Reset crc_len in case it was changed after queue setup by a
5076                  * call to configure.
5077                  */
5078                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5079                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5080                 else
5081                         rxq->crc_len = 0;
5082
5083                 /* Setup the Base and Length of the Rx Descriptor Rings */
5084                 bus_addr = rxq->rx_ring_phys_addr;
5085                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5086                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5087                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5088                                 (uint32_t)(bus_addr >> 32));
5089                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5090                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5091                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5092                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5093
5094                 /* Configure the SRRCTL register */
5095                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5096
5097                 /* Set if packets are dropped when no descriptors available */
5098                 if (rxq->drop_en)
5099                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5100
5101                 /*
5102                  * Configure the RX buffer size in the BSIZEPACKET field of
5103                  * the SRRCTL register of the queue.
5104                  * The value is in 1 KB resolution. Valid values can be from
5105                  * 1 KB to 16 KB.
5106                  */
5107                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5108                         RTE_PKTMBUF_HEADROOM);
5109                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5110                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5111
5112                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5113
5114                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5115                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5116
5117                 /* It adds dual VLAN length for supporting dual VLAN */
5118                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5119                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5120                         dev->data->scattered_rx = 1;
5121                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5122                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5123         }
5124
5125         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5126                 dev->data->scattered_rx = 1;
5127
5128         /*
5129          * Device configured with multiple RX queues.
5130          */
5131         ixgbe_dev_mq_rx_configure(dev);
5132
5133         /*
5134          * Setup the Checksum Register.
5135          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5136          * Enable IP/L4 checkum computation by hardware if requested to do so.
5137          */
5138         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5139         rxcsum |= IXGBE_RXCSUM_PCSD;
5140         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5141                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5142         else
5143                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5144
5145         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5146
5147         if (hw->mac.type == ixgbe_mac_82599EB ||
5148             hw->mac.type == ixgbe_mac_X540) {
5149                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5150                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5151                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5152                 else
5153                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5154                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5155                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5156         }
5157
5158         rc = ixgbe_set_rsc(dev);
5159         if (rc)
5160                 return rc;
5161
5162         ixgbe_set_rx_function(dev);
5163
5164         return 0;
5165 }
5166
5167 /*
5168  * Initializes Transmit Unit.
5169  */
5170 void __rte_cold
5171 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5172 {
5173         struct ixgbe_hw     *hw;
5174         struct ixgbe_tx_queue *txq;
5175         uint64_t bus_addr;
5176         uint32_t hlreg0;
5177         uint32_t txctrl;
5178         uint16_t i;
5179
5180         PMD_INIT_FUNC_TRACE();
5181         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5182
5183         /* Enable TX CRC (checksum offload requirement) and hw padding
5184          * (TSO requirement)
5185          */
5186         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5187         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5188         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5189
5190         /* Setup the Base and Length of the Tx Descriptor Rings */
5191         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5192                 txq = dev->data->tx_queues[i];
5193
5194                 bus_addr = txq->tx_ring_phys_addr;
5195                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5196                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5197                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5198                                 (uint32_t)(bus_addr >> 32));
5199                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5200                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5201                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5202                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5203                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5204
5205                 /*
5206                  * Disable Tx Head Writeback RO bit, since this hoses
5207                  * bookkeeping if things aren't delivered in order.
5208                  */
5209                 switch (hw->mac.type) {
5210                 case ixgbe_mac_82598EB:
5211                         txctrl = IXGBE_READ_REG(hw,
5212                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5213                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5214                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5215                                         txctrl);
5216                         break;
5217
5218                 case ixgbe_mac_82599EB:
5219                 case ixgbe_mac_X540:
5220                 case ixgbe_mac_X550:
5221                 case ixgbe_mac_X550EM_x:
5222                 case ixgbe_mac_X550EM_a:
5223                 default:
5224                         txctrl = IXGBE_READ_REG(hw,
5225                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5226                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5227                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5228                                         txctrl);
5229                         break;
5230                 }
5231         }
5232
5233         /* Device configured with multiple TX queues. */
5234         ixgbe_dev_mq_tx_configure(dev);
5235 }
5236
5237 /*
5238  * Check if requested loopback mode is supported
5239  */
5240 int
5241 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5242 {
5243         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5244
5245         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5246                 if (hw->mac.type == ixgbe_mac_82599EB ||
5247                      hw->mac.type == ixgbe_mac_X540 ||
5248                      hw->mac.type == ixgbe_mac_X550 ||
5249                      hw->mac.type == ixgbe_mac_X550EM_x ||
5250                      hw->mac.type == ixgbe_mac_X550EM_a)
5251                         return 0;
5252
5253         return -ENOTSUP;
5254 }
5255
5256 /*
5257  * Set up link for 82599 loopback mode Tx->Rx.
5258  */
5259 static inline void __rte_cold
5260 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5261 {
5262         PMD_INIT_FUNC_TRACE();
5263
5264         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5265                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5266                                 IXGBE_SUCCESS) {
5267                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5268                         /* ignore error */
5269                         return;
5270                 }
5271         }
5272
5273         /* Restart link */
5274         IXGBE_WRITE_REG(hw,
5275                         IXGBE_AUTOC,
5276                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5277         ixgbe_reset_pipeline_82599(hw);
5278
5279         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5280         msec_delay(50);
5281 }
5282
5283
5284 /*
5285  * Start Transmit and Receive Units.
5286  */
5287 int __rte_cold
5288 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5289 {
5290         struct ixgbe_hw     *hw;
5291         struct ixgbe_tx_queue *txq;
5292         struct ixgbe_rx_queue *rxq;
5293         uint32_t txdctl;
5294         uint32_t dmatxctl;
5295         uint32_t rxctrl;
5296         uint16_t i;
5297         int ret = 0;
5298
5299         PMD_INIT_FUNC_TRACE();
5300         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5301
5302         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5303                 txq = dev->data->tx_queues[i];
5304                 /* Setup Transmit Threshold Registers */
5305                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5306                 txdctl |= txq->pthresh & 0x7F;
5307                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5308                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5309                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5310         }
5311
5312         if (hw->mac.type != ixgbe_mac_82598EB) {
5313                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5314                 dmatxctl |= IXGBE_DMATXCTL_TE;
5315                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5316         }
5317
5318         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5319                 txq = dev->data->tx_queues[i];
5320                 if (!txq->tx_deferred_start) {
5321                         ret = ixgbe_dev_tx_queue_start(dev, i);
5322                         if (ret < 0)
5323                                 return ret;
5324                 }
5325         }
5326
5327         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5328                 rxq = dev->data->rx_queues[i];
5329                 if (!rxq->rx_deferred_start) {
5330                         ret = ixgbe_dev_rx_queue_start(dev, i);
5331                         if (ret < 0)
5332                                 return ret;
5333                 }
5334         }
5335
5336         /* Enable Receive engine */
5337         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5338         if (hw->mac.type == ixgbe_mac_82598EB)
5339                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5340         rxctrl |= IXGBE_RXCTRL_RXEN;
5341         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5342
5343         /* If loopback mode is enabled, set up the link accordingly */
5344         if (dev->data->dev_conf.lpbk_mode != 0) {
5345                 if (hw->mac.type == ixgbe_mac_82599EB)
5346                         ixgbe_setup_loopback_link_82599(hw);
5347                 else if (hw->mac.type == ixgbe_mac_X540 ||
5348                      hw->mac.type == ixgbe_mac_X550 ||
5349                      hw->mac.type == ixgbe_mac_X550EM_x ||
5350                      hw->mac.type == ixgbe_mac_X550EM_a)
5351                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5352         }
5353
5354 #ifdef RTE_LIBRTE_SECURITY
5355         if ((dev->data->dev_conf.rxmode.offloads &
5356                         DEV_RX_OFFLOAD_SECURITY) ||
5357                 (dev->data->dev_conf.txmode.offloads &
5358                         DEV_TX_OFFLOAD_SECURITY)) {
5359                 ret = ixgbe_crypto_enable_ipsec(dev);
5360                 if (ret != 0) {
5361                         PMD_DRV_LOG(ERR,
5362                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5363                                     ret);
5364                         return ret;
5365                 }
5366         }
5367 #endif
5368
5369         return 0;
5370 }
5371
5372 /*
5373  * Start Receive Units for specified queue.
5374  */
5375 int __rte_cold
5376 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5377 {
5378         struct ixgbe_hw     *hw;
5379         struct ixgbe_rx_queue *rxq;
5380         uint32_t rxdctl;
5381         int poll_ms;
5382
5383         PMD_INIT_FUNC_TRACE();
5384         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5385
5386         rxq = dev->data->rx_queues[rx_queue_id];
5387
5388         /* Allocate buffers for descriptor rings */
5389         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5390                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5391                              rx_queue_id);
5392                 return -1;
5393         }
5394         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5395         rxdctl |= IXGBE_RXDCTL_ENABLE;
5396         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5397
5398         /* Wait until RX Enable ready */
5399         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5400         do {
5401                 rte_delay_ms(1);
5402                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5403         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5404         if (!poll_ms)
5405                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5406         rte_wmb();
5407         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5408         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5409         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5410
5411         return 0;
5412 }
5413
5414 /*
5415  * Stop Receive Units for specified queue.
5416  */
5417 int __rte_cold
5418 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5419 {
5420         struct ixgbe_hw     *hw;
5421         struct ixgbe_adapter *adapter = dev->data->dev_private;
5422         struct ixgbe_rx_queue *rxq;
5423         uint32_t rxdctl;
5424         int poll_ms;
5425
5426         PMD_INIT_FUNC_TRACE();
5427         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5428
5429         rxq = dev->data->rx_queues[rx_queue_id];
5430
5431         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5432         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5433         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5434
5435         /* Wait until RX Enable bit clear */
5436         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5437         do {
5438                 rte_delay_ms(1);
5439                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5440         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5441         if (!poll_ms)
5442                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5443
5444         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5445
5446         ixgbe_rx_queue_release_mbufs(rxq);
5447         ixgbe_reset_rx_queue(adapter, rxq);
5448         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5449
5450         return 0;
5451 }
5452
5453
5454 /*
5455  * Start Transmit Units for specified queue.
5456  */
5457 int __rte_cold
5458 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5459 {
5460         struct ixgbe_hw     *hw;
5461         struct ixgbe_tx_queue *txq;
5462         uint32_t txdctl;
5463         int poll_ms;
5464
5465         PMD_INIT_FUNC_TRACE();
5466         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5467
5468         txq = dev->data->tx_queues[tx_queue_id];
5469         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5470         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5471         txdctl |= IXGBE_TXDCTL_ENABLE;
5472         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5473
5474         /* Wait until TX Enable ready */
5475         if (hw->mac.type == ixgbe_mac_82599EB) {
5476                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5477                 do {
5478                         rte_delay_ms(1);
5479                         txdctl = IXGBE_READ_REG(hw,
5480                                 IXGBE_TXDCTL(txq->reg_idx));
5481                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5482                 if (!poll_ms)
5483                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5484                                 tx_queue_id);
5485         }
5486         rte_wmb();
5487         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5488         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5489
5490         return 0;
5491 }
5492
5493 /*
5494  * Stop Transmit Units for specified queue.
5495  */
5496 int __rte_cold
5497 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5498 {
5499         struct ixgbe_hw     *hw;
5500         struct ixgbe_tx_queue *txq;
5501         uint32_t txdctl;
5502         uint32_t txtdh, txtdt;
5503         int poll_ms;
5504
5505         PMD_INIT_FUNC_TRACE();
5506         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5507
5508         txq = dev->data->tx_queues[tx_queue_id];
5509
5510         /* Wait until TX queue is empty */
5511         if (hw->mac.type == ixgbe_mac_82599EB) {
5512                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5513                 do {
5514                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5515                         txtdh = IXGBE_READ_REG(hw,
5516                                                IXGBE_TDH(txq->reg_idx));
5517                         txtdt = IXGBE_READ_REG(hw,
5518                                                IXGBE_TDT(txq->reg_idx));
5519                 } while (--poll_ms && (txtdh != txtdt));
5520                 if (!poll_ms)
5521                         PMD_INIT_LOG(ERR,
5522                                 "Tx Queue %d is not empty when stopping.",
5523                                 tx_queue_id);
5524         }
5525
5526         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5527         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5528         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5529
5530         /* Wait until TX Enable bit clear */
5531         if (hw->mac.type == ixgbe_mac_82599EB) {
5532                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5533                 do {
5534                         rte_delay_ms(1);
5535                         txdctl = IXGBE_READ_REG(hw,
5536                                                 IXGBE_TXDCTL(txq->reg_idx));
5537                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5538                 if (!poll_ms)
5539                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5540                                 tx_queue_id);
5541         }
5542
5543         if (txq->ops != NULL) {
5544                 txq->ops->release_mbufs(txq);
5545                 txq->ops->reset(txq);
5546         }
5547         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5548
5549         return 0;
5550 }
5551
5552 void
5553 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5554         struct rte_eth_rxq_info *qinfo)
5555 {
5556         struct ixgbe_rx_queue *rxq;
5557
5558         rxq = dev->data->rx_queues[queue_id];
5559
5560         qinfo->mp = rxq->mb_pool;
5561         qinfo->scattered_rx = dev->data->scattered_rx;
5562         qinfo->nb_desc = rxq->nb_rx_desc;
5563
5564         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5565         qinfo->conf.rx_drop_en = rxq->drop_en;
5566         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5567         qinfo->conf.offloads = rxq->offloads;
5568 }
5569
5570 void
5571 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5572         struct rte_eth_txq_info *qinfo)
5573 {
5574         struct ixgbe_tx_queue *txq;
5575
5576         txq = dev->data->tx_queues[queue_id];
5577
5578         qinfo->nb_desc = txq->nb_tx_desc;
5579
5580         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5581         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5582         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5583
5584         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5585         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5586         qinfo->conf.offloads = txq->offloads;
5587         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5588 }
5589
5590 /*
5591  * [VF] Initializes Receive Unit.
5592  */
5593 int __rte_cold
5594 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5595 {
5596         struct ixgbe_hw     *hw;
5597         struct ixgbe_rx_queue *rxq;
5598         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5599         uint64_t bus_addr;
5600         uint32_t srrctl, psrtype = 0;
5601         uint16_t buf_size;
5602         uint16_t i;
5603         int ret;
5604
5605         PMD_INIT_FUNC_TRACE();
5606         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5607
5608         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5609                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5610                         "it should be power of 2");
5611                 return -1;
5612         }
5613
5614         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5615                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5616                         "it should be equal to or less than %d",
5617                         hw->mac.max_rx_queues);
5618                 return -1;
5619         }
5620
5621         /*
5622          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5623          * disables the VF receipt of packets if the PF MTU is > 1500.
5624          * This is done to deal with 82599 limitations that imposes
5625          * the PF and all VFs to share the same MTU.
5626          * Then, the PF driver enables again the VF receipt of packet when
5627          * the VF driver issues a IXGBE_VF_SET_LPE request.
5628          * In the meantime, the VF device cannot be used, even if the VF driver
5629          * and the Guest VM network stack are ready to accept packets with a
5630          * size up to the PF MTU.
5631          * As a work-around to this PF behaviour, force the call to
5632          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5633          * VF packets received can work in all cases.
5634          */
5635         ixgbevf_rlpml_set_vf(hw,
5636                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5637
5638         /*
5639          * Assume no header split and no VLAN strip support
5640          * on any Rx queue first .
5641          */
5642         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5643         /* Setup RX queues */
5644         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5645                 rxq = dev->data->rx_queues[i];
5646
5647                 /* Allocate buffers for descriptor rings */
5648                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5649                 if (ret)
5650                         return ret;
5651
5652                 /* Setup the Base and Length of the Rx Descriptor Rings */
5653                 bus_addr = rxq->rx_ring_phys_addr;
5654
5655                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5656                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5657                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5658                                 (uint32_t)(bus_addr >> 32));
5659                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5660                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5661                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5662                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5663
5664
5665                 /* Configure the SRRCTL register */
5666                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5667
5668                 /* Set if packets are dropped when no descriptors available */
5669                 if (rxq->drop_en)
5670                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5671
5672                 /*
5673                  * Configure the RX buffer size in the BSIZEPACKET field of
5674                  * the SRRCTL register of the queue.
5675                  * The value is in 1 KB resolution. Valid values can be from
5676                  * 1 KB to 16 KB.
5677                  */
5678                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5679                         RTE_PKTMBUF_HEADROOM);
5680                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5681                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5682
5683                 /*
5684                  * VF modification to write virtual function SRRCTL register
5685                  */
5686                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5687
5688                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5689                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5690
5691                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5692                     /* It adds dual VLAN length for supporting dual VLAN */
5693                     (rxmode->max_rx_pkt_len +
5694                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5695                         if (!dev->data->scattered_rx)
5696                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5697                         dev->data->scattered_rx = 1;
5698                 }
5699
5700                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5701                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5702         }
5703
5704         /* Set RQPL for VF RSS according to max Rx queue */
5705         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5706                 IXGBE_PSRTYPE_RQPL_SHIFT;
5707         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5708
5709         ixgbe_set_rx_function(dev);
5710
5711         return 0;
5712 }
5713
5714 /*
5715  * [VF] Initializes Transmit Unit.
5716  */
5717 void __rte_cold
5718 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5719 {
5720         struct ixgbe_hw     *hw;
5721         struct ixgbe_tx_queue *txq;
5722         uint64_t bus_addr;
5723         uint32_t txctrl;
5724         uint16_t i;
5725
5726         PMD_INIT_FUNC_TRACE();
5727         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5728
5729         /* Setup the Base and Length of the Tx Descriptor Rings */
5730         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5731                 txq = dev->data->tx_queues[i];
5732                 bus_addr = txq->tx_ring_phys_addr;
5733                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5734                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5735                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5736                                 (uint32_t)(bus_addr >> 32));
5737                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5738                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5739                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5740                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5741                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5742
5743                 /*
5744                  * Disable Tx Head Writeback RO bit, since this hoses
5745                  * bookkeeping if things aren't delivered in order.
5746                  */
5747                 txctrl = IXGBE_READ_REG(hw,
5748                                 IXGBE_VFDCA_TXCTRL(i));
5749                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5750                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5751                                 txctrl);
5752         }
5753 }
5754
5755 /*
5756  * [VF] Start Transmit and Receive Units.
5757  */
5758 void __rte_cold
5759 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5760 {
5761         struct ixgbe_hw     *hw;
5762         struct ixgbe_tx_queue *txq;
5763         struct ixgbe_rx_queue *rxq;
5764         uint32_t txdctl;
5765         uint32_t rxdctl;
5766         uint16_t i;
5767         int poll_ms;
5768
5769         PMD_INIT_FUNC_TRACE();
5770         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5771
5772         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5773                 txq = dev->data->tx_queues[i];
5774                 /* Setup Transmit Threshold Registers */
5775                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5776                 txdctl |= txq->pthresh & 0x7F;
5777                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5778                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5779                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5780         }
5781
5782         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5783
5784                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5785                 txdctl |= IXGBE_TXDCTL_ENABLE;
5786                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5787
5788                 poll_ms = 10;
5789                 /* Wait until TX Enable ready */
5790                 do {
5791                         rte_delay_ms(1);
5792                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5793                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5794                 if (!poll_ms)
5795                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5796         }
5797         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5798
5799                 rxq = dev->data->rx_queues[i];
5800
5801                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5802                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5803                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5804
5805                 /* Wait until RX Enable ready */
5806                 poll_ms = 10;
5807                 do {
5808                         rte_delay_ms(1);
5809                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5810                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5811                 if (!poll_ms)
5812                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5813                 rte_wmb();
5814                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5815
5816         }
5817 }
5818
5819 int
5820 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5821                     const struct rte_flow_action_rss *in)
5822 {
5823         if (in->key_len > RTE_DIM(out->key) ||
5824             in->queue_num > RTE_DIM(out->queue))
5825                 return -EINVAL;
5826         out->conf = (struct rte_flow_action_rss){
5827                 .func = in->func,
5828                 .level = in->level,
5829                 .types = in->types,
5830                 .key_len = in->key_len,
5831                 .queue_num = in->queue_num,
5832                 .key = memcpy(out->key, in->key, in->key_len),
5833                 .queue = memcpy(out->queue, in->queue,
5834                                 sizeof(*in->queue) * in->queue_num),
5835         };
5836         return 0;
5837 }
5838
5839 int
5840 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5841                       const struct rte_flow_action_rss *with)
5842 {
5843         return (comp->func == with->func &&
5844                 comp->level == with->level &&
5845                 comp->types == with->types &&
5846                 comp->key_len == with->key_len &&
5847                 comp->queue_num == with->queue_num &&
5848                 !memcmp(comp->key, with->key, with->key_len) &&
5849                 !memcmp(comp->queue, with->queue,
5850                         sizeof(*with->queue) * with->queue_num));
5851 }
5852
5853 int
5854 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5855                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5856 {
5857         struct ixgbe_hw *hw;
5858         uint32_t reta;
5859         uint16_t i;
5860         uint16_t j;
5861         uint16_t sp_reta_size;
5862         uint32_t reta_reg;
5863         struct rte_eth_rss_conf rss_conf = {
5864                 .rss_key = conf->conf.key_len ?
5865                         (void *)(uintptr_t)conf->conf.key : NULL,
5866                 .rss_key_len = conf->conf.key_len,
5867                 .rss_hf = conf->conf.types,
5868         };
5869         struct ixgbe_filter_info *filter_info =
5870                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5871
5872         PMD_INIT_FUNC_TRACE();
5873         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5874
5875         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5876
5877         if (!add) {
5878                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5879                                           &conf->conf)) {
5880                         ixgbe_rss_disable(dev);
5881                         memset(&filter_info->rss_info, 0,
5882                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5883                         return 0;
5884                 }
5885                 return -EINVAL;
5886         }
5887
5888         if (filter_info->rss_info.conf.queue_num)
5889                 return -EINVAL;
5890         /* Fill in redirection table
5891          * The byte-swap is needed because NIC registers are in
5892          * little-endian order.
5893          */
5894         reta = 0;
5895         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5896                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5897
5898                 if (j == conf->conf.queue_num)
5899                         j = 0;
5900                 reta = (reta << 8) | conf->conf.queue[j];
5901                 if ((i & 3) == 3)
5902                         IXGBE_WRITE_REG(hw, reta_reg,
5903                                         rte_bswap32(reta));
5904         }
5905
5906         /* Configure the RSS key and the RSS protocols used to compute
5907          * the RSS hash of input packets.
5908          */
5909         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5910                 ixgbe_rss_disable(dev);
5911                 return 0;
5912         }
5913         if (rss_conf.rss_key == NULL)
5914                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5915         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5916
5917         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5918                 return -EINVAL;
5919
5920         return 0;
5921 }
5922
5923 /* Stubs needed for linkage when CONFIG_RTE_ARCH_PPC_64 is set */
5924 #if defined(RTE_ARCH_PPC_64)
5925 int
5926 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5927 {
5928         return -1;
5929 }
5930
5931 uint16_t
5932 ixgbe_recv_pkts_vec(
5933         void __rte_unused *rx_queue,
5934         struct rte_mbuf __rte_unused **rx_pkts,
5935         uint16_t __rte_unused nb_pkts)
5936 {
5937         return 0;
5938 }
5939
5940 uint16_t
5941 ixgbe_recv_scattered_pkts_vec(
5942         void __rte_unused *rx_queue,
5943         struct rte_mbuf __rte_unused **rx_pkts,
5944         uint16_t __rte_unused nb_pkts)
5945 {
5946         return 0;
5947 }
5948
5949 int
5950 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5951 {
5952         return -1;
5953 }
5954
5955 uint16_t
5956 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5957                 struct rte_mbuf __rte_unused **tx_pkts,
5958                 uint16_t __rte_unused nb_pkts)
5959 {
5960         return 0;
5961 }
5962
5963 int
5964 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5965 {
5966         return -1;
5967 }
5968
5969 void
5970 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
5971 {
5972         return;
5973 }
5974 #endif