net: add macro for VLAN header length
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
39 #include <rte_udp.h>
40 #include <rte_tcp.h>
41 #include <rte_sctp.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
44 #include <rte_ip.h>
45 #include <rte_net.h>
46 #include <rte_vect.h>
47
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
55
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST RTE_MBUF_F_TX_IEEE1588_TMST
58 #else
59 #define IXGBE_TX_IEEE1588_TMST 0
60 #endif
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_OUTER_IPV6 |                \
63                 RTE_MBUF_F_TX_OUTER_IPV4 |               \
64                 RTE_MBUF_F_TX_IPV6 |                     \
65                 RTE_MBUF_F_TX_IPV4 |                     \
66                 RTE_MBUF_F_TX_VLAN |             \
67                 RTE_MBUF_F_TX_IP_CKSUM |                 \
68                 RTE_MBUF_F_TX_L4_MASK |          \
69                 RTE_MBUF_F_TX_TCP_SEG |          \
70                 RTE_MBUF_F_TX_MACSEC |                   \
71                 RTE_MBUF_F_TX_OUTER_IP_CKSUM |           \
72                 RTE_MBUF_F_TX_SEC_OFFLOAD |      \
73                 IXGBE_TX_IEEE1588_TMST)
74
75 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
76                 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
77
78 #if 1
79 #define RTE_PMD_USE_PREFETCH
80 #endif
81
82 #ifdef RTE_PMD_USE_PREFETCH
83 /*
84  * Prefetch a cache line into all cache levels.
85  */
86 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
87 #else
88 #define rte_ixgbe_prefetch(p)   do {} while (0)
89 #endif
90
91 /*********************************************************************
92  *
93  *  TX functions
94  *
95  **********************************************************************/
96
97 /*
98  * Check for descriptors with their DD bit set and free mbufs.
99  * Return the total number of buffers freed.
100  */
101 static __rte_always_inline int
102 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
103 {
104         struct ixgbe_tx_entry *txep;
105         uint32_t status;
106         int i, nb_free = 0;
107         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
108
109         /* check DD bit on threshold descriptor */
110         status = txq->tx_ring[txq->tx_next_dd].wb.status;
111         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
112                 return 0;
113
114         /*
115          * first buffer to free from S/W ring is at index
116          * tx_next_dd - (tx_rs_thresh-1)
117          */
118         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
119
120         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
121                 /* free buffers one at a time */
122                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
123                 txep->mbuf = NULL;
124
125                 if (unlikely(m == NULL))
126                         continue;
127
128                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
129                     (nb_free > 0 && m->pool != free[0]->pool)) {
130                         rte_mempool_put_bulk(free[0]->pool,
131                                              (void **)free, nb_free);
132                         nb_free = 0;
133                 }
134
135                 free[nb_free++] = m;
136         }
137
138         if (nb_free > 0)
139                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
140
141         /* buffers were freed, update counters */
142         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
143         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
144         if (txq->tx_next_dd >= txq->nb_tx_desc)
145                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
146
147         return txq->tx_rs_thresh;
148 }
149
150 /* Populate 4 descriptors with data from 4 mbufs */
151 static inline void
152 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
153 {
154         uint64_t buf_dma_addr;
155         uint32_t pkt_len;
156         int i;
157
158         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
159                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
160                 pkt_len = (*pkts)->data_len;
161
162                 /* write data to descriptor */
163                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
164
165                 txdp->read.cmd_type_len =
166                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
167
168                 txdp->read.olinfo_status =
169                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
170
171                 rte_prefetch0(&(*pkts)->pool);
172         }
173 }
174
175 /* Populate 1 descriptor with data from 1 mbuf */
176 static inline void
177 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
178 {
179         uint64_t buf_dma_addr;
180         uint32_t pkt_len;
181
182         buf_dma_addr = rte_mbuf_data_iova(*pkts);
183         pkt_len = (*pkts)->data_len;
184
185         /* write data to descriptor */
186         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187         txdp->read.cmd_type_len =
188                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
189         txdp->read.olinfo_status =
190                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
191         rte_prefetch0(&(*pkts)->pool);
192 }
193
194 /*
195  * Fill H/W descriptor ring with mbuf data.
196  * Copy mbuf pointers to the S/W ring.
197  */
198 static inline void
199 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
200                       uint16_t nb_pkts)
201 {
202         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
203         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
204         const int N_PER_LOOP = 4;
205         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
206         int mainpart, leftover;
207         int i, j;
208
209         /*
210          * Process most of the packets in chunks of N pkts.  Any
211          * leftover packets will get processed one at a time.
212          */
213         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
214         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
215         for (i = 0; i < mainpart; i += N_PER_LOOP) {
216                 /* Copy N mbuf pointers to the S/W ring */
217                 for (j = 0; j < N_PER_LOOP; ++j) {
218                         (txep + i + j)->mbuf = *(pkts + i + j);
219                 }
220                 tx4(txdp + i, pkts + i);
221         }
222
223         if (unlikely(leftover > 0)) {
224                 for (i = 0; i < leftover; ++i) {
225                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
226                         tx1(txdp + mainpart + i, pkts + mainpart + i);
227                 }
228         }
229 }
230
231 static inline uint16_t
232 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
233              uint16_t nb_pkts)
234 {
235         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
236         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
237         uint16_t n = 0;
238
239         /*
240          * Begin scanning the H/W ring for done descriptors when the
241          * number of available descriptors drops below tx_free_thresh.  For
242          * each done descriptor, free the associated buffer.
243          */
244         if (txq->nb_tx_free < txq->tx_free_thresh)
245                 ixgbe_tx_free_bufs(txq);
246
247         /* Only use descriptors that are available */
248         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
249         if (unlikely(nb_pkts == 0))
250                 return 0;
251
252         /* Use exactly nb_pkts descriptors */
253         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
254
255         /*
256          * At this point, we know there are enough descriptors in the
257          * ring to transmit all the packets.  This assumes that each
258          * mbuf contains a single segment, and that no new offloads
259          * are expected, which would require a new context descriptor.
260          */
261
262         /*
263          * See if we're going to wrap-around. If so, handle the top
264          * of the descriptor ring first, then do the bottom.  If not,
265          * the processing looks just like the "bottom" part anyway...
266          */
267         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
268                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
269                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
270
271                 /*
272                  * We know that the last descriptor in the ring will need to
273                  * have its RS bit set because tx_rs_thresh has to be
274                  * a divisor of the ring size
275                  */
276                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
277                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
278                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
279
280                 txq->tx_tail = 0;
281         }
282
283         /* Fill H/W descriptor ring with mbuf data */
284         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
285         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
286
287         /*
288          * Determine if RS bit should be set
289          * This is what we actually want:
290          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
291          * but instead of subtracting 1 and doing >=, we can just do
292          * greater than without subtracting.
293          */
294         if (txq->tx_tail > txq->tx_next_rs) {
295                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
296                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
297                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
298                                                 txq->tx_rs_thresh);
299                 if (txq->tx_next_rs >= txq->nb_tx_desc)
300                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
301         }
302
303         /*
304          * Check for wrap-around. This would only happen if we used
305          * up to the last descriptor in the ring, no more, no less.
306          */
307         if (txq->tx_tail >= txq->nb_tx_desc)
308                 txq->tx_tail = 0;
309
310         /* update tail pointer */
311         rte_wmb();
312         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
313
314         return nb_pkts;
315 }
316
317 uint16_t
318 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
319                        uint16_t nb_pkts)
320 {
321         uint16_t nb_tx;
322
323         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
324         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
325                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
326
327         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
328         nb_tx = 0;
329         while (nb_pkts) {
330                 uint16_t ret, n;
331
332                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
333                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
334                 nb_tx = (uint16_t)(nb_tx + ret);
335                 nb_pkts = (uint16_t)(nb_pkts - ret);
336                 if (ret < n)
337                         break;
338         }
339
340         return nb_tx;
341 }
342
343 static uint16_t
344 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
345                     uint16_t nb_pkts)
346 {
347         uint16_t nb_tx = 0;
348         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
349
350         while (nb_pkts) {
351                 uint16_t ret, num;
352
353                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
354                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
355                                                  num);
356                 nb_tx += ret;
357                 nb_pkts -= ret;
358                 if (ret < num)
359                         break;
360         }
361
362         return nb_tx;
363 }
364
365 static inline void
366 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
367                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
368                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
369                 __rte_unused uint64_t *mdata)
370 {
371         uint32_t type_tucmd_mlhl;
372         uint32_t mss_l4len_idx = 0;
373         uint32_t ctx_idx;
374         uint32_t vlan_macip_lens;
375         union ixgbe_tx_offload tx_offload_mask;
376         uint32_t seqnum_seed = 0;
377
378         ctx_idx = txq->ctx_curr;
379         tx_offload_mask.data[0] = 0;
380         tx_offload_mask.data[1] = 0;
381         type_tucmd_mlhl = 0;
382
383         /* Specify which HW CTX to upload. */
384         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
385
386         if (ol_flags & RTE_MBUF_F_TX_VLAN)
387                 tx_offload_mask.vlan_tci |= ~0;
388
389         /* check if TCP segmentation required for this packet */
390         if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
391                 /* implies IP cksum in IPv4 */
392                 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
393                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
394                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
395                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
396                 else
397                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
398                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
399                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
400
401                 tx_offload_mask.l2_len |= ~0;
402                 tx_offload_mask.l3_len |= ~0;
403                 tx_offload_mask.l4_len |= ~0;
404                 tx_offload_mask.tso_segsz |= ~0;
405                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
406                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
407         } else { /* no TSO, check if hardware checksum is needed */
408                 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
409                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
410                         tx_offload_mask.l2_len |= ~0;
411                         tx_offload_mask.l3_len |= ~0;
412                 }
413
414                 switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
415                 case RTE_MBUF_F_TX_UDP_CKSUM:
416                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
417                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
418                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
419                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case RTE_MBUF_F_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
427                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
428                         tx_offload_mask.l2_len |= ~0;
429                         tx_offload_mask.l3_len |= ~0;
430                         break;
431                 case RTE_MBUF_F_TX_SCTP_CKSUM:
432                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
433                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
434                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
435                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
436                         tx_offload_mask.l2_len |= ~0;
437                         tx_offload_mask.l3_len |= ~0;
438                         break;
439                 default:
440                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
441                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
442                         break;
443                 }
444         }
445
446         if (ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) {
447                 tx_offload_mask.outer_l2_len |= ~0;
448                 tx_offload_mask.outer_l3_len |= ~0;
449                 tx_offload_mask.l2_len |= ~0;
450                 seqnum_seed |= tx_offload.outer_l3_len
451                                << IXGBE_ADVTXD_OUTER_IPLEN;
452                 seqnum_seed |= tx_offload.l2_len
453                                << IXGBE_ADVTXD_TUNNEL_LEN;
454         }
455 #ifdef RTE_LIB_SECURITY
456         if (ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
457                 union ixgbe_crypto_tx_desc_md *md =
458                                 (union ixgbe_crypto_tx_desc_md *)mdata;
459                 seqnum_seed |=
460                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
461                 type_tucmd_mlhl |= md->enc ?
462                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
463                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
464                 type_tucmd_mlhl |=
465                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
466                 tx_offload_mask.sa_idx |= ~0;
467                 tx_offload_mask.sec_pad_len |= ~0;
468         }
469 #endif
470
471         txq->ctx_cache[ctx_idx].flags = ol_flags;
472         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
473                 tx_offload_mask.data[0] & tx_offload.data[0];
474         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
475                 tx_offload_mask.data[1] & tx_offload.data[1];
476         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
477
478         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
479         vlan_macip_lens = tx_offload.l3_len;
480         if (ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
481                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
482                                     IXGBE_ADVTXD_MACLEN_SHIFT);
483         else
484                 vlan_macip_lens |= (tx_offload.l2_len <<
485                                     IXGBE_ADVTXD_MACLEN_SHIFT);
486         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
487         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
488         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
489         ctx_txd->seqnum_seed     = seqnum_seed;
490 }
491
492 /*
493  * Check which hardware context can be used. Use the existing match
494  * or create a new context descriptor.
495  */
496 static inline uint32_t
497 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
498                    union ixgbe_tx_offload tx_offload)
499 {
500         /* If match with the current used context */
501         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
502                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
503                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
504                      & tx_offload.data[0])) &&
505                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
506                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
507                      & tx_offload.data[1]))))
508                 return txq->ctx_curr;
509
510         /* What if match with the next context  */
511         txq->ctx_curr ^= 1;
512         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
513                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
514                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
515                      & tx_offload.data[0])) &&
516                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
517                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
518                      & tx_offload.data[1]))))
519                 return txq->ctx_curr;
520
521         /* Mismatch, use the previous context */
522         return IXGBE_CTX_NUM;
523 }
524
525 static inline uint32_t
526 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
527 {
528         uint32_t tmp = 0;
529
530         if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) != RTE_MBUF_F_TX_L4_NO_CKSUM)
531                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
532         if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
534         if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
535                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
536         return tmp;
537 }
538
539 static inline uint32_t
540 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
541 {
542         uint32_t cmdtype = 0;
543
544         if (ol_flags & RTE_MBUF_F_TX_VLAN)
545                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
546         if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
548         if (ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
549                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
550         if (ol_flags & RTE_MBUF_F_TX_MACSEC)
551                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
552         return cmdtype;
553 }
554
555 /* Default RS bit threshold values */
556 #ifndef DEFAULT_TX_RS_THRESH
557 #define DEFAULT_TX_RS_THRESH   32
558 #endif
559 #ifndef DEFAULT_TX_FREE_THRESH
560 #define DEFAULT_TX_FREE_THRESH 32
561 #endif
562
563 /* Reset transmit descriptors after they have been used */
564 static inline int
565 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
566 {
567         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
568         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
569         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
570         uint16_t nb_tx_desc = txq->nb_tx_desc;
571         uint16_t desc_to_clean_to;
572         uint16_t nb_tx_to_clean;
573         uint32_t status;
574
575         /* Determine the last descriptor needing to be cleaned */
576         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
577         if (desc_to_clean_to >= nb_tx_desc)
578                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
579
580         /* Check to make sure the last descriptor to clean is done */
581         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
582         status = txr[desc_to_clean_to].wb.status;
583         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
584                 PMD_TX_LOG(DEBUG,
585                            "TX descriptor %4u is not done"
586                            "(port=%d queue=%d)",
587                            desc_to_clean_to,
588                            txq->port_id, txq->queue_id);
589                 /* Failed to clean any descriptors, better luck next time */
590                 return -(1);
591         }
592
593         /* Figure out how many descriptors will be cleaned */
594         if (last_desc_cleaned > desc_to_clean_to)
595                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
596                                                         desc_to_clean_to);
597         else
598                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
599                                                 last_desc_cleaned);
600
601         PMD_TX_LOG(DEBUG,
602                    "Cleaning %4u TX descriptors: %4u to %4u "
603                    "(port=%d queue=%d)",
604                    nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
605                    txq->port_id, txq->queue_id);
606
607         /*
608          * The last descriptor to clean is done, so that means all the
609          * descriptors from the last descriptor that was cleaned
610          * up to the last descriptor with the RS bit set
611          * are done. Only reset the threshold descriptor.
612          */
613         txr[desc_to_clean_to].wb.status = 0;
614
615         /* Update the txq to reflect the last descriptor that was cleaned */
616         txq->last_desc_cleaned = desc_to_clean_to;
617         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
618
619         /* No Error */
620         return 0;
621 }
622
623 uint16_t
624 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
625                 uint16_t nb_pkts)
626 {
627         struct ixgbe_tx_queue *txq;
628         struct ixgbe_tx_entry *sw_ring;
629         struct ixgbe_tx_entry *txe, *txn;
630         volatile union ixgbe_adv_tx_desc *txr;
631         volatile union ixgbe_adv_tx_desc *txd, *txp;
632         struct rte_mbuf     *tx_pkt;
633         struct rte_mbuf     *m_seg;
634         uint64_t buf_dma_addr;
635         uint32_t olinfo_status;
636         uint32_t cmd_type_len;
637         uint32_t pkt_len;
638         uint16_t slen;
639         uint64_t ol_flags;
640         uint16_t tx_id;
641         uint16_t tx_last;
642         uint16_t nb_tx;
643         uint16_t nb_used;
644         uint64_t tx_ol_req;
645         uint32_t ctx = 0;
646         uint32_t new_ctx;
647         union ixgbe_tx_offload tx_offload;
648 #ifdef RTE_LIB_SECURITY
649         uint8_t use_ipsec;
650 #endif
651
652         tx_offload.data[0] = 0;
653         tx_offload.data[1] = 0;
654         txq = tx_queue;
655         sw_ring = txq->sw_ring;
656         txr     = txq->tx_ring;
657         tx_id   = txq->tx_tail;
658         txe = &sw_ring[tx_id];
659         txp = NULL;
660
661         /* Determine if the descriptor ring needs to be cleaned. */
662         if (txq->nb_tx_free < txq->tx_free_thresh)
663                 ixgbe_xmit_cleanup(txq);
664
665         rte_prefetch0(&txe->mbuf->pool);
666
667         /* TX loop */
668         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
669                 new_ctx = 0;
670                 tx_pkt = *tx_pkts++;
671                 pkt_len = tx_pkt->pkt_len;
672
673                 /*
674                  * Determine how many (if any) context descriptors
675                  * are needed for offload functionality.
676                  */
677                 ol_flags = tx_pkt->ol_flags;
678 #ifdef RTE_LIB_SECURITY
679                 use_ipsec = txq->using_ipsec && (ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
680 #endif
681
682                 /* If hardware offload required */
683                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
684                 if (tx_ol_req) {
685                         tx_offload.l2_len = tx_pkt->l2_len;
686                         tx_offload.l3_len = tx_pkt->l3_len;
687                         tx_offload.l4_len = tx_pkt->l4_len;
688                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
689                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
690                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
691                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
692 #ifdef RTE_LIB_SECURITY
693                         if (use_ipsec) {
694                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
695                                         (union ixgbe_crypto_tx_desc_md *)
696                                                 rte_security_dynfield(tx_pkt);
697                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
698                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
699                         }
700 #endif
701
702                         /* If new context need be built or reuse the exist ctx. */
703                         ctx = what_advctx_update(txq, tx_ol_req,
704                                 tx_offload);
705                         /* Only allocate context descriptor if required*/
706                         new_ctx = (ctx == IXGBE_CTX_NUM);
707                         ctx = txq->ctx_curr;
708                 }
709
710                 /*
711                  * Keep track of how many descriptors are used this loop
712                  * This will always be the number of segments + the number of
713                  * Context descriptors required to transmit the packet
714                  */
715                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
716
717                 if (txp != NULL &&
718                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
719                         /* set RS on the previous packet in the burst */
720                         txp->read.cmd_type_len |=
721                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
722
723                 /*
724                  * The number of descriptors that must be allocated for a
725                  * packet is the number of segments of that packet, plus 1
726                  * Context Descriptor for the hardware offload, if any.
727                  * Determine the last TX descriptor to allocate in the TX ring
728                  * for the packet, starting from the current position (tx_id)
729                  * in the ring.
730                  */
731                 tx_last = (uint16_t) (tx_id + nb_used - 1);
732
733                 /* Circular ring */
734                 if (tx_last >= txq->nb_tx_desc)
735                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
736
737                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
738                            " tx_first=%u tx_last=%u",
739                            (unsigned) txq->port_id,
740                            (unsigned) txq->queue_id,
741                            (unsigned) pkt_len,
742                            (unsigned) tx_id,
743                            (unsigned) tx_last);
744
745                 /*
746                  * Make sure there are enough TX descriptors available to
747                  * transmit the entire packet.
748                  * nb_used better be less than or equal to txq->tx_rs_thresh
749                  */
750                 if (nb_used > txq->nb_tx_free) {
751                         PMD_TX_LOG(DEBUG,
752                                    "Not enough free TX descriptors "
753                                    "nb_used=%4u nb_free=%4u "
754                                    "(port=%d queue=%d)",
755                                    nb_used, txq->nb_tx_free,
756                                    txq->port_id, txq->queue_id);
757
758                         if (ixgbe_xmit_cleanup(txq) != 0) {
759                                 /* Could not clean any descriptors */
760                                 if (nb_tx == 0)
761                                         return 0;
762                                 goto end_of_tx;
763                         }
764
765                         /* nb_used better be <= txq->tx_rs_thresh */
766                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
767                                 PMD_TX_LOG(DEBUG,
768                                            "The number of descriptors needed to "
769                                            "transmit the packet exceeds the "
770                                            "RS bit threshold. This will impact "
771                                            "performance."
772                                            "nb_used=%4u nb_free=%4u "
773                                            "tx_rs_thresh=%4u. "
774                                            "(port=%d queue=%d)",
775                                            nb_used, txq->nb_tx_free,
776                                            txq->tx_rs_thresh,
777                                            txq->port_id, txq->queue_id);
778                                 /*
779                                  * Loop here until there are enough TX
780                                  * descriptors or until the ring cannot be
781                                  * cleaned.
782                                  */
783                                 while (nb_used > txq->nb_tx_free) {
784                                         if (ixgbe_xmit_cleanup(txq) != 0) {
785                                                 /*
786                                                  * Could not clean any
787                                                  * descriptors
788                                                  */
789                                                 if (nb_tx == 0)
790                                                         return 0;
791                                                 goto end_of_tx;
792                                         }
793                                 }
794                         }
795                 }
796
797                 /*
798                  * By now there are enough free TX descriptors to transmit
799                  * the packet.
800                  */
801
802                 /*
803                  * Set common flags of all TX Data Descriptors.
804                  *
805                  * The following bits must be set in all Data Descriptors:
806                  *   - IXGBE_ADVTXD_DTYP_DATA
807                  *   - IXGBE_ADVTXD_DCMD_DEXT
808                  *
809                  * The following bits must be set in the first Data Descriptor
810                  * and are ignored in the other ones:
811                  *   - IXGBE_ADVTXD_DCMD_IFCS
812                  *   - IXGBE_ADVTXD_MAC_1588
813                  *   - IXGBE_ADVTXD_DCMD_VLE
814                  *
815                  * The following bits must only be set in the last Data
816                  * Descriptor:
817                  *   - IXGBE_TXD_CMD_EOP
818                  *
819                  * The following bits can be set in any Data Descriptor, but
820                  * are only set in the last Data Descriptor:
821                  *   - IXGBE_TXD_CMD_RS
822                  */
823                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
824                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
825
826 #ifdef RTE_LIBRTE_IEEE1588
827                 if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
828                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
829 #endif
830
831                 olinfo_status = 0;
832                 if (tx_ol_req) {
833
834                         if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
835                                 /* when TSO is on, paylen in descriptor is the
836                                  * not the packet len but the tcp payload len */
837                                 pkt_len -= (tx_offload.l2_len +
838                                         tx_offload.l3_len + tx_offload.l4_len);
839                         }
840
841                         /*
842                          * Setup the TX Advanced Context Descriptor if required
843                          */
844                         if (new_ctx) {
845                                 volatile struct ixgbe_adv_tx_context_desc *
846                                     ctx_txd;
847
848                                 ctx_txd = (volatile struct
849                                     ixgbe_adv_tx_context_desc *)
850                                     &txr[tx_id];
851
852                                 txn = &sw_ring[txe->next_id];
853                                 rte_prefetch0(&txn->mbuf->pool);
854
855                                 if (txe->mbuf != NULL) {
856                                         rte_pktmbuf_free_seg(txe->mbuf);
857                                         txe->mbuf = NULL;
858                                 }
859
860                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
861                                         tx_offload,
862                                         rte_security_dynfield(tx_pkt));
863
864                                 txe->last_id = tx_last;
865                                 tx_id = txe->next_id;
866                                 txe = txn;
867                         }
868
869                         /*
870                          * Setup the TX Advanced Data Descriptor,
871                          * This path will go through
872                          * whatever new/reuse the context descriptor
873                          */
874                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
875                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
876                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
877                 }
878
879                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
880 #ifdef RTE_LIB_SECURITY
881                 if (use_ipsec)
882                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
883 #endif
884
885                 m_seg = tx_pkt;
886                 do {
887                         txd = &txr[tx_id];
888                         txn = &sw_ring[txe->next_id];
889                         rte_prefetch0(&txn->mbuf->pool);
890
891                         if (txe->mbuf != NULL)
892                                 rte_pktmbuf_free_seg(txe->mbuf);
893                         txe->mbuf = m_seg;
894
895                         /*
896                          * Set up Transmit Data Descriptor.
897                          */
898                         slen = m_seg->data_len;
899                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
900                         txd->read.buffer_addr =
901                                 rte_cpu_to_le_64(buf_dma_addr);
902                         txd->read.cmd_type_len =
903                                 rte_cpu_to_le_32(cmd_type_len | slen);
904                         txd->read.olinfo_status =
905                                 rte_cpu_to_le_32(olinfo_status);
906                         txe->last_id = tx_last;
907                         tx_id = txe->next_id;
908                         txe = txn;
909                         m_seg = m_seg->next;
910                 } while (m_seg != NULL);
911
912                 /*
913                  * The last packet data descriptor needs End Of Packet (EOP)
914                  */
915                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
916                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
917                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
918
919                 /* Set RS bit only on threshold packets' last descriptor */
920                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
921                         PMD_TX_LOG(DEBUG,
922                                    "Setting RS bit on TXD id="
923                                    "%4u (port=%d queue=%d)",
924                                    tx_last, txq->port_id, txq->queue_id);
925
926                         cmd_type_len |= IXGBE_TXD_CMD_RS;
927
928                         /* Update txq RS bit counters */
929                         txq->nb_tx_used = 0;
930                         txp = NULL;
931                 } else
932                         txp = txd;
933
934                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
935         }
936
937 end_of_tx:
938         /* set RS on last packet in the burst */
939         if (txp != NULL)
940                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
941
942         rte_wmb();
943
944         /*
945          * Set the Transmit Descriptor Tail (TDT)
946          */
947         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
948                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
949                    (unsigned) tx_id, (unsigned) nb_tx);
950         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
951         txq->tx_tail = tx_id;
952
953         return nb_tx;
954 }
955
956 /*********************************************************************
957  *
958  *  TX prep functions
959  *
960  **********************************************************************/
961 uint16_t
962 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
963 {
964         int i, ret;
965         uint64_t ol_flags;
966         struct rte_mbuf *m;
967         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
968
969         for (i = 0; i < nb_pkts; i++) {
970                 m = tx_pkts[i];
971                 ol_flags = m->ol_flags;
972
973                 /**
974                  * Check if packet meets requirements for number of segments
975                  *
976                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
977                  *       non-TSO
978                  */
979
980                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
981                         rte_errno = EINVAL;
982                         return i;
983                 }
984
985                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
986                         rte_errno = ENOTSUP;
987                         return i;
988                 }
989
990                 /* check the size of packet */
991                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
992                         rte_errno = EINVAL;
993                         return i;
994                 }
995
996 #ifdef RTE_ETHDEV_DEBUG_TX
997                 ret = rte_validate_tx_offload(m);
998                 if (ret != 0) {
999                         rte_errno = -ret;
1000                         return i;
1001                 }
1002 #endif
1003                 ret = rte_net_intel_cksum_prepare(m);
1004                 if (ret != 0) {
1005                         rte_errno = -ret;
1006                         return i;
1007                 }
1008         }
1009
1010         return i;
1011 }
1012
1013 /*********************************************************************
1014  *
1015  *  RX functions
1016  *
1017  **********************************************************************/
1018
1019 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1020 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1021 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1022 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1023 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1024 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1028 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1029 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1030 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1031 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1032 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1036 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1040 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1044 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1048 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1052
1053 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1076
1077 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1100
1101 /**
1102  * Use 2 different table for normal packet and tunnel packet
1103  * to save the space.
1104  */
1105 const uint32_t
1106         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1107         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1108         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1109                 RTE_PTYPE_L3_IPV4,
1110         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1112         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1114         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1116         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4_EXT,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1122         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1124         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV6,
1126         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1128         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1130         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1132         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6_EXT,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1138         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1140         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1142                 RTE_PTYPE_INNER_L3_IPV6,
1143         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1144                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1145                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1146         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1147                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1148         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1149         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1150                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1151                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1152         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1153                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1154                 RTE_PTYPE_INNER_L3_IPV6,
1155         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1156                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1157                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1158         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1159                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1160                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1161         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1162                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1163                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1164         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1165                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1166                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1167         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1168                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1169                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1170         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1171                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1172                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1173         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1174                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1175                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1176         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1178                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1179         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1180                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1181                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1182         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1183                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1184                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1185         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1186                 RTE_PTYPE_L2_ETHER |
1187                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1188                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1189 };
1190
1191 const uint32_t
1192         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1193         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1194                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1195                 RTE_PTYPE_INNER_L2_ETHER,
1196         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1197                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1198                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1199         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1200                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1201                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1202         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1203                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1205         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1206                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1207                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1208         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1209                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1211         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1212                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1214         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1215                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1216                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1217                 RTE_PTYPE_INNER_L4_TCP,
1218         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1219                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1220                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1221                 RTE_PTYPE_INNER_L4_TCP,
1222         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1223                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1224                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1225         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1226                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1228                 RTE_PTYPE_INNER_L4_TCP,
1229         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1230                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1231                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1232                 RTE_PTYPE_INNER_L3_IPV4,
1233         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1234                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1236                 RTE_PTYPE_INNER_L4_UDP,
1237         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1238                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1240                 RTE_PTYPE_INNER_L4_UDP,
1241         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1242                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1243                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1244                 RTE_PTYPE_INNER_L4_SCTP,
1245         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1246                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1247                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1248         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1249                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1250                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1251                 RTE_PTYPE_INNER_L4_UDP,
1252         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1253                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1254                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1255                 RTE_PTYPE_INNER_L4_SCTP,
1256         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1257                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1258                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1259                 RTE_PTYPE_INNER_L3_IPV4,
1260         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1261                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1262                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1263                 RTE_PTYPE_INNER_L4_SCTP,
1264         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1265                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1266                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1267                 RTE_PTYPE_INNER_L4_SCTP,
1268         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1269                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1270                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1271                 RTE_PTYPE_INNER_L4_TCP,
1272         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1273                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1274                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1275                 RTE_PTYPE_INNER_L4_UDP,
1276
1277         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1278                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1280         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1281                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1282                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1283                 RTE_PTYPE_INNER_L3_IPV4,
1284         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1285                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1286                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1287                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1288         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1289                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1290                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1291                 RTE_PTYPE_INNER_L3_IPV6,
1292         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1293                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1294                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1295                 RTE_PTYPE_INNER_L3_IPV4,
1296         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1297                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1298                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1299                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1300         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1301                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1302                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1303                 RTE_PTYPE_INNER_L3_IPV4,
1304         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1305                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1306                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1307                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1308         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1309                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1310                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1311                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1312         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1313                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1314                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1315                 RTE_PTYPE_INNER_L3_IPV4,
1316         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1317                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1318                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1319                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1320         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1321                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1322                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1323                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1324         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1325                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1326                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1327                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1328         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1329                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1330                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1331                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1332         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1333                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1334                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1335                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1336         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1337                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1338                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1339                 RTE_PTYPE_INNER_L3_IPV4,
1340         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1341                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1342                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1343                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1344         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1345                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1346                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1347                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1348         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1349                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1350                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1351                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1352         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1353                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1354                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1355                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1356         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1357                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1358                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1359                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1360         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1361                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1362                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1363                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1364         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1365                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1366                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1367                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1368 };
1369
1370 static int
1371 ixgbe_monitor_callback(const uint64_t value,
1372                 const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
1373 {
1374         const uint64_t m = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1375         /*
1376          * we expect the DD bit to be set to 1 if this descriptor was already
1377          * written to.
1378          */
1379         return (value & m) == m ? -1 : 0;
1380 }
1381
1382 int
1383 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1384 {
1385         volatile union ixgbe_adv_rx_desc *rxdp;
1386         struct ixgbe_rx_queue *rxq = rx_queue;
1387         uint16_t desc;
1388
1389         desc = rxq->rx_tail;
1390         rxdp = &rxq->rx_ring[desc];
1391         /* watch for changes in status bit */
1392         pmc->addr = &rxdp->wb.upper.status_error;
1393
1394         /* comparison callback */
1395         pmc->fn = ixgbe_monitor_callback;
1396
1397         /* the registers are 32-bit */
1398         pmc->size = sizeof(uint32_t);
1399
1400         return 0;
1401 }
1402
1403 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1404 static inline uint32_t
1405 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1406 {
1407
1408         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1409                 return RTE_PTYPE_UNKNOWN;
1410
1411         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1412
1413         /* For tunnel packet */
1414         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1415                 /* Remove the tunnel bit to save the space. */
1416                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1417                 return ptype_table_tn[pkt_info];
1418         }
1419
1420         /**
1421          * For x550, if it's not tunnel,
1422          * tunnel type bit should be set to 0.
1423          * Reuse 82599's mask.
1424          */
1425         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1426
1427         return ptype_table[pkt_info];
1428 }
1429
1430 static inline uint64_t
1431 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1432 {
1433         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1434                 0, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
1435                 0, RTE_MBUF_F_RX_RSS_HASH, 0, RTE_MBUF_F_RX_RSS_HASH,
1436                 RTE_MBUF_F_RX_RSS_HASH, 0, 0, 0,
1437                 0, 0, 0,  RTE_MBUF_F_RX_FDIR,
1438         };
1439 #ifdef RTE_LIBRTE_IEEE1588
1440         static uint64_t ip_pkt_etqf_map[8] = {
1441                 0, 0, 0, RTE_MBUF_F_RX_IEEE1588_PTP,
1442                 0, 0, 0, 0,
1443         };
1444
1445         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1446                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1447                                 ip_rss_types_map[pkt_info & 0XF];
1448         else
1449                 return ip_rss_types_map[pkt_info & 0XF];
1450 #else
1451         return ip_rss_types_map[pkt_info & 0XF];
1452 #endif
1453 }
1454
1455 static inline uint64_t
1456 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1457 {
1458         uint64_t pkt_flags;
1459
1460         /*
1461          * Check if VLAN present only.
1462          * Do not check whether L3/L4 rx checksum done by NIC or not,
1463          * That can be found from rte_eth_rxmode.offloads flag
1464          */
1465         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1466
1467 #ifdef RTE_LIBRTE_IEEE1588
1468         if (rx_status & IXGBE_RXD_STAT_TMST)
1469                 pkt_flags = pkt_flags | RTE_MBUF_F_RX_IEEE1588_TMST;
1470 #endif
1471         return pkt_flags;
1472 }
1473
1474 static inline uint64_t
1475 rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
1476                            uint8_t rx_udp_csum_zero_err)
1477 {
1478         uint64_t pkt_flags;
1479
1480         /*
1481          * Bit 31: IPE, IPv4 checksum error
1482          * Bit 30: L4I, L4I integrity error
1483          */
1484         static uint64_t error_to_pkt_flags_map[4] = {
1485                 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
1486                 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
1487                 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
1488                 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD
1489         };
1490         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1491                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1492
1493         /* Mask out the bad UDP checksum error if the hardware has UDP zero
1494          * checksum error issue, so that the software application will then
1495          * have to recompute the checksum itself if needed.
1496          */
1497         if ((rx_status & IXGBE_RXDADV_ERR_TCPE) &&
1498             (pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) &&
1499             rx_udp_csum_zero_err)
1500                 pkt_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_BAD;
1501
1502         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1503             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1504                 pkt_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
1505         }
1506
1507 #ifdef RTE_LIB_SECURITY
1508         if (rx_status & IXGBE_RXD_STAT_SECP) {
1509                 pkt_flags |= RTE_MBUF_F_RX_SEC_OFFLOAD;
1510                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1511                         pkt_flags |= RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED;
1512         }
1513 #endif
1514
1515         return pkt_flags;
1516 }
1517
1518 /*
1519  * LOOK_AHEAD defines how many desc statuses to check beyond the
1520  * current descriptor.
1521  * It must be a pound define for optimal performance.
1522  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1523  * function only works with LOOK_AHEAD=8.
1524  */
1525 #define LOOK_AHEAD 8
1526 #if (LOOK_AHEAD != 8)
1527 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1528 #endif
1529 static inline int
1530 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1531 {
1532         volatile union ixgbe_adv_rx_desc *rxdp;
1533         struct ixgbe_rx_entry *rxep;
1534         struct rte_mbuf *mb;
1535         uint16_t pkt_len;
1536         uint64_t pkt_flags;
1537         int nb_dd;
1538         uint32_t s[LOOK_AHEAD];
1539         uint32_t pkt_info[LOOK_AHEAD];
1540         int i, j, nb_rx = 0;
1541         uint32_t status;
1542         uint64_t vlan_flags = rxq->vlan_flags;
1543
1544         /* get references to current descriptor and S/W ring entry */
1545         rxdp = &rxq->rx_ring[rxq->rx_tail];
1546         rxep = &rxq->sw_ring[rxq->rx_tail];
1547
1548         status = rxdp->wb.upper.status_error;
1549         /* check to make sure there is at least 1 packet to receive */
1550         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1551                 return 0;
1552
1553         /*
1554          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1555          * reference packets that are ready to be received.
1556          */
1557         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1558              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1559                 /* Read desc statuses backwards to avoid race condition */
1560                 for (j = 0; j < LOOK_AHEAD; j++)
1561                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1562
1563                 rte_smp_rmb();
1564
1565                 /* Compute how many status bits were set */
1566                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1567                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1568                         ;
1569
1570                 for (j = 0; j < nb_dd; j++)
1571                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1572                                                        lo_dword.data);
1573
1574                 nb_rx += nb_dd;
1575
1576                 /* Translate descriptor info to mbuf format */
1577                 for (j = 0; j < nb_dd; ++j) {
1578                         mb = rxep[j].mbuf;
1579                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1580                                   rxq->crc_len;
1581                         mb->data_len = pkt_len;
1582                         mb->pkt_len = pkt_len;
1583                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1584
1585                         /* convert descriptor fields to rte mbuf flags */
1586                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1587                                 vlan_flags);
1588                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j],
1589                                         (uint16_t)pkt_info[j],
1590                                         rxq->rx_udp_csum_zero_err);
1591                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1592                                         ((uint16_t)pkt_info[j]);
1593                         mb->ol_flags = pkt_flags;
1594                         mb->packet_type =
1595                                 ixgbe_rxd_pkt_info_to_pkt_type
1596                                         (pkt_info[j], rxq->pkt_type_mask);
1597
1598                         if (likely(pkt_flags & RTE_MBUF_F_RX_RSS_HASH))
1599                                 mb->hash.rss = rte_le_to_cpu_32(
1600                                     rxdp[j].wb.lower.hi_dword.rss);
1601                         else if (pkt_flags & RTE_MBUF_F_RX_FDIR) {
1602                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1603                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1604                                     IXGBE_ATR_HASH_MASK;
1605                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1606                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1607                         }
1608                 }
1609
1610                 /* Move mbuf pointers from the S/W ring to the stage */
1611                 for (j = 0; j < LOOK_AHEAD; ++j) {
1612                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1613                 }
1614
1615                 /* stop if all requested packets could not be received */
1616                 if (nb_dd != LOOK_AHEAD)
1617                         break;
1618         }
1619
1620         /* clear software ring entries so we can cleanup correctly */
1621         for (i = 0; i < nb_rx; ++i) {
1622                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1623         }
1624
1625
1626         return nb_rx;
1627 }
1628
1629 static inline int
1630 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1631 {
1632         volatile union ixgbe_adv_rx_desc *rxdp;
1633         struct ixgbe_rx_entry *rxep;
1634         struct rte_mbuf *mb;
1635         uint16_t alloc_idx;
1636         __le64 dma_addr;
1637         int diag, i;
1638
1639         /* allocate buffers in bulk directly into the S/W ring */
1640         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1641         rxep = &rxq->sw_ring[alloc_idx];
1642         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1643                                     rxq->rx_free_thresh);
1644         if (unlikely(diag != 0))
1645                 return -ENOMEM;
1646
1647         rxdp = &rxq->rx_ring[alloc_idx];
1648         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1649                 /* populate the static rte mbuf fields */
1650                 mb = rxep[i].mbuf;
1651                 if (reset_mbuf) {
1652                         mb->port = rxq->port_id;
1653                 }
1654
1655                 rte_mbuf_refcnt_set(mb, 1);
1656                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1657
1658                 /* populate the descriptors */
1659                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1660                 rxdp[i].read.hdr_addr = 0;
1661                 rxdp[i].read.pkt_addr = dma_addr;
1662         }
1663
1664         /* update state of internal queue structure */
1665         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1666         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1667                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1668
1669         /* no errors */
1670         return 0;
1671 }
1672
1673 static inline uint16_t
1674 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1675                          uint16_t nb_pkts)
1676 {
1677         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1678         int i;
1679
1680         /* how many packets are ready to return? */
1681         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1682
1683         /* copy mbuf pointers to the application's packet list */
1684         for (i = 0; i < nb_pkts; ++i)
1685                 rx_pkts[i] = stage[i];
1686
1687         /* update internal queue state */
1688         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1689         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1690
1691         return nb_pkts;
1692 }
1693
1694 static inline uint16_t
1695 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1696              uint16_t nb_pkts)
1697 {
1698         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1699         uint16_t nb_rx = 0;
1700
1701         /* Any previously recv'd pkts will be returned from the Rx stage */
1702         if (rxq->rx_nb_avail)
1703                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1704
1705         /* Scan the H/W ring for packets to receive */
1706         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1707
1708         /* update internal queue state */
1709         rxq->rx_next_avail = 0;
1710         rxq->rx_nb_avail = nb_rx;
1711         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1712
1713         /* if required, allocate new buffers to replenish descriptors */
1714         if (rxq->rx_tail > rxq->rx_free_trigger) {
1715                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1716
1717                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1718                         int i, j;
1719
1720                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1721                                    "queue_id=%u", (unsigned) rxq->port_id,
1722                                    (unsigned) rxq->queue_id);
1723
1724                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1725                                 rxq->rx_free_thresh;
1726
1727                         /*
1728                          * Need to rewind any previous receives if we cannot
1729                          * allocate new buffers to replenish the old ones.
1730                          */
1731                         rxq->rx_nb_avail = 0;
1732                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1733                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1734                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1735
1736                         return 0;
1737                 }
1738
1739                 /* update tail pointer */
1740                 rte_wmb();
1741                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1742                                             cur_free_trigger);
1743         }
1744
1745         if (rxq->rx_tail >= rxq->nb_rx_desc)
1746                 rxq->rx_tail = 0;
1747
1748         /* received any packets this loop? */
1749         if (rxq->rx_nb_avail)
1750                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1751
1752         return 0;
1753 }
1754
1755 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1756 uint16_t
1757 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1758                            uint16_t nb_pkts)
1759 {
1760         uint16_t nb_rx;
1761
1762         if (unlikely(nb_pkts == 0))
1763                 return 0;
1764
1765         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1766                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1767
1768         /* request is relatively large, chunk it up */
1769         nb_rx = 0;
1770         while (nb_pkts) {
1771                 uint16_t ret, n;
1772
1773                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1774                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1775                 nb_rx = (uint16_t)(nb_rx + ret);
1776                 nb_pkts = (uint16_t)(nb_pkts - ret);
1777                 if (ret < n)
1778                         break;
1779         }
1780
1781         return nb_rx;
1782 }
1783
1784 uint16_t
1785 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1786                 uint16_t nb_pkts)
1787 {
1788         struct ixgbe_rx_queue *rxq;
1789         volatile union ixgbe_adv_rx_desc *rx_ring;
1790         volatile union ixgbe_adv_rx_desc *rxdp;
1791         struct ixgbe_rx_entry *sw_ring;
1792         struct ixgbe_rx_entry *rxe;
1793         struct rte_mbuf *rxm;
1794         struct rte_mbuf *nmb;
1795         union ixgbe_adv_rx_desc rxd;
1796         uint64_t dma_addr;
1797         uint32_t staterr;
1798         uint32_t pkt_info;
1799         uint16_t pkt_len;
1800         uint16_t rx_id;
1801         uint16_t nb_rx;
1802         uint16_t nb_hold;
1803         uint64_t pkt_flags;
1804         uint64_t vlan_flags;
1805
1806         nb_rx = 0;
1807         nb_hold = 0;
1808         rxq = rx_queue;
1809         rx_id = rxq->rx_tail;
1810         rx_ring = rxq->rx_ring;
1811         sw_ring = rxq->sw_ring;
1812         vlan_flags = rxq->vlan_flags;
1813         while (nb_rx < nb_pkts) {
1814                 /*
1815                  * The order of operations here is important as the DD status
1816                  * bit must not be read after any other descriptor fields.
1817                  * rx_ring and rxdp are pointing to volatile data so the order
1818                  * of accesses cannot be reordered by the compiler. If they were
1819                  * not volatile, they could be reordered which could lead to
1820                  * using invalid descriptor fields when read from rxd.
1821                  */
1822                 rxdp = &rx_ring[rx_id];
1823                 staterr = rxdp->wb.upper.status_error;
1824                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1825                         break;
1826                 rxd = *rxdp;
1827
1828                 /*
1829                  * End of packet.
1830                  *
1831                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1832                  * is likely to be invalid and to be dropped by the various
1833                  * validation checks performed by the network stack.
1834                  *
1835                  * Allocate a new mbuf to replenish the RX ring descriptor.
1836                  * If the allocation fails:
1837                  *    - arrange for that RX descriptor to be the first one
1838                  *      being parsed the next time the receive function is
1839                  *      invoked [on the same queue].
1840                  *
1841                  *    - Stop parsing the RX ring and return immediately.
1842                  *
1843                  * This policy do not drop the packet received in the RX
1844                  * descriptor for which the allocation of a new mbuf failed.
1845                  * Thus, it allows that packet to be later retrieved if
1846                  * mbuf have been freed in the mean time.
1847                  * As a side effect, holding RX descriptors instead of
1848                  * systematically giving them back to the NIC may lead to
1849                  * RX ring exhaustion situations.
1850                  * However, the NIC can gracefully prevent such situations
1851                  * to happen by sending specific "back-pressure" flow control
1852                  * frames to its peer(s).
1853                  */
1854                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1855                            "ext_err_stat=0x%08x pkt_len=%u",
1856                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1857                            (unsigned) rx_id, (unsigned) staterr,
1858                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1859
1860                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1861                 if (nmb == NULL) {
1862                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1863                                    "queue_id=%u", (unsigned) rxq->port_id,
1864                                    (unsigned) rxq->queue_id);
1865                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1866                         break;
1867                 }
1868
1869                 nb_hold++;
1870                 rxe = &sw_ring[rx_id];
1871                 rx_id++;
1872                 if (rx_id == rxq->nb_rx_desc)
1873                         rx_id = 0;
1874
1875                 /* Prefetch next mbuf while processing current one. */
1876                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1877
1878                 /*
1879                  * When next RX descriptor is on a cache-line boundary,
1880                  * prefetch the next 4 RX descriptors and the next 8 pointers
1881                  * to mbufs.
1882                  */
1883                 if ((rx_id & 0x3) == 0) {
1884                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1885                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1886                 }
1887
1888                 rxm = rxe->mbuf;
1889                 rxe->mbuf = nmb;
1890                 dma_addr =
1891                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1892                 rxdp->read.hdr_addr = 0;
1893                 rxdp->read.pkt_addr = dma_addr;
1894
1895                 /*
1896                  * Initialize the returned mbuf.
1897                  * 1) setup generic mbuf fields:
1898                  *    - number of segments,
1899                  *    - next segment,
1900                  *    - packet length,
1901                  *    - RX port identifier.
1902                  * 2) integrate hardware offload data, if any:
1903                  *    - RSS flag & hash,
1904                  *    - IP checksum flag,
1905                  *    - VLAN TCI, if any,
1906                  *    - error flags.
1907                  */
1908                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1909                                       rxq->crc_len);
1910                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1911                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1912                 rxm->nb_segs = 1;
1913                 rxm->next = NULL;
1914                 rxm->pkt_len = pkt_len;
1915                 rxm->data_len = pkt_len;
1916                 rxm->port = rxq->port_id;
1917
1918                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1919                 /* Only valid if RTE_MBUF_F_RX_VLAN set in pkt_flags */
1920                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1921
1922                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1923                 pkt_flags = pkt_flags |
1924                         rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
1925                                                    rxq->rx_udp_csum_zero_err);
1926                 pkt_flags = pkt_flags |
1927                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1928                 rxm->ol_flags = pkt_flags;
1929                 rxm->packet_type =
1930                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1931                                                        rxq->pkt_type_mask);
1932
1933                 if (likely(pkt_flags & RTE_MBUF_F_RX_RSS_HASH))
1934                         rxm->hash.rss = rte_le_to_cpu_32(
1935                                                 rxd.wb.lower.hi_dword.rss);
1936                 else if (pkt_flags & RTE_MBUF_F_RX_FDIR) {
1937                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1938                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1939                                         IXGBE_ATR_HASH_MASK;
1940                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1941                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1942                 }
1943                 /*
1944                  * Store the mbuf address into the next entry of the array
1945                  * of returned packets.
1946                  */
1947                 rx_pkts[nb_rx++] = rxm;
1948         }
1949         rxq->rx_tail = rx_id;
1950
1951         /*
1952          * If the number of free RX descriptors is greater than the RX free
1953          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1954          * register.
1955          * Update the RDT with the value of the last processed RX descriptor
1956          * minus 1, to guarantee that the RDT register is never equal to the
1957          * RDH register, which creates a "full" ring situtation from the
1958          * hardware point of view...
1959          */
1960         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1961         if (nb_hold > rxq->rx_free_thresh) {
1962                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1963                            "nb_hold=%u nb_rx=%u",
1964                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1965                            (unsigned) rx_id, (unsigned) nb_hold,
1966                            (unsigned) nb_rx);
1967                 rx_id = (uint16_t) ((rx_id == 0) ?
1968                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1969                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1970                 nb_hold = 0;
1971         }
1972         rxq->nb_rx_hold = nb_hold;
1973         return nb_rx;
1974 }
1975
1976 /**
1977  * Detect an RSC descriptor.
1978  */
1979 static inline uint32_t
1980 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1981 {
1982         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1983                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1984 }
1985
1986 /**
1987  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1988  *
1989  * Fill the following info in the HEAD buffer of the Rx cluster:
1990  *    - RX port identifier
1991  *    - hardware offload data, if any:
1992  *      - RSS flag & hash
1993  *      - IP checksum flag
1994  *      - VLAN TCI, if any
1995  *      - error flags
1996  * @head HEAD of the packet cluster
1997  * @desc HW descriptor to get data from
1998  * @rxq Pointer to the Rx queue
1999  */
2000 static inline void
2001 ixgbe_fill_cluster_head_buf(
2002         struct rte_mbuf *head,
2003         union ixgbe_adv_rx_desc *desc,
2004         struct ixgbe_rx_queue *rxq,
2005         uint32_t staterr)
2006 {
2007         uint32_t pkt_info;
2008         uint64_t pkt_flags;
2009
2010         head->port = rxq->port_id;
2011
2012         /* The vlan_tci field is only valid when RTE_MBUF_F_RX_VLAN is
2013          * set in the pkt_flags field.
2014          */
2015         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
2016         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
2017         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
2018         pkt_flags |= rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
2019                                                 rxq->rx_udp_csum_zero_err);
2020         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2021         head->ol_flags = pkt_flags;
2022         head->packet_type =
2023                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2024
2025         if (likely(pkt_flags & RTE_MBUF_F_RX_RSS_HASH))
2026                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2027         else if (pkt_flags & RTE_MBUF_F_RX_FDIR) {
2028                 head->hash.fdir.hash =
2029                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2030                                                           & IXGBE_ATR_HASH_MASK;
2031                 head->hash.fdir.id =
2032                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2033         }
2034 }
2035
2036 /**
2037  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2038  *
2039  * @rx_queue Rx queue handle
2040  * @rx_pkts table of received packets
2041  * @nb_pkts size of rx_pkts table
2042  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2043  *
2044  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2045  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2046  *
2047  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2048  * 1) When non-EOP RSC completion arrives:
2049  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2050  *       segment's data length.
2051  *    b) Set the "next" pointer of the current segment to point to the segment
2052  *       at the NEXTP index.
2053  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2054  *       in the sw_rsc_ring.
2055  * 2) When EOP arrives we just update the cluster's total length and offload
2056  *    flags and deliver the cluster up to the upper layers. In our case - put it
2057  *    in the rx_pkts table.
2058  *
2059  * Returns the number of received packets/clusters (according to the "bulk
2060  * receive" interface).
2061  */
2062 static inline uint16_t
2063 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2064                     bool bulk_alloc)
2065 {
2066         struct ixgbe_rx_queue *rxq = rx_queue;
2067         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2068         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2069         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2070         uint16_t rx_id = rxq->rx_tail;
2071         uint16_t nb_rx = 0;
2072         uint16_t nb_hold = rxq->nb_rx_hold;
2073         uint16_t prev_id = rxq->rx_tail;
2074
2075         while (nb_rx < nb_pkts) {
2076                 bool eop;
2077                 struct ixgbe_rx_entry *rxe;
2078                 struct ixgbe_scattered_rx_entry *sc_entry;
2079                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2080                 struct ixgbe_rx_entry *next_rxe = NULL;
2081                 struct rte_mbuf *first_seg;
2082                 struct rte_mbuf *rxm;
2083                 struct rte_mbuf *nmb = NULL;
2084                 union ixgbe_adv_rx_desc rxd;
2085                 uint16_t data_len;
2086                 uint16_t next_id;
2087                 volatile union ixgbe_adv_rx_desc *rxdp;
2088                 uint32_t staterr;
2089
2090 next_desc:
2091                 /*
2092                  * The code in this whole file uses the volatile pointer to
2093                  * ensure the read ordering of the status and the rest of the
2094                  * descriptor fields (on the compiler level only!!!). This is so
2095                  * UGLY - why not to just use the compiler barrier instead? DPDK
2096                  * even has the rte_compiler_barrier() for that.
2097                  *
2098                  * But most importantly this is just wrong because this doesn't
2099                  * ensure memory ordering in a general case at all. For
2100                  * instance, DPDK is supposed to work on Power CPUs where
2101                  * compiler barrier may just not be enough!
2102                  *
2103                  * I tried to write only this function properly to have a
2104                  * starting point (as a part of an LRO/RSC series) but the
2105                  * compiler cursed at me when I tried to cast away the
2106                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2107                  * keeping it the way it is for now.
2108                  *
2109                  * The code in this file is broken in so many other places and
2110                  * will just not work on a big endian CPU anyway therefore the
2111                  * lines below will have to be revisited together with the rest
2112                  * of the ixgbe PMD.
2113                  *
2114                  * TODO:
2115                  *    - Get rid of "volatile" and let the compiler do its job.
2116                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2117                  *      memory ordering below.
2118                  */
2119                 rxdp = &rx_ring[rx_id];
2120                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2121
2122                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2123                         break;
2124
2125                 rxd = *rxdp;
2126
2127                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2128                                   "staterr=0x%x data_len=%u",
2129                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2130                            rte_le_to_cpu_16(rxd.wb.upper.length));
2131
2132                 if (!bulk_alloc) {
2133                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2134                         if (nmb == NULL) {
2135                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2136                                                   "port_id=%u queue_id=%u",
2137                                            rxq->port_id, rxq->queue_id);
2138
2139                                 rte_eth_devices[rxq->port_id].data->
2140                                                         rx_mbuf_alloc_failed++;
2141                                 break;
2142                         }
2143                 } else if (nb_hold > rxq->rx_free_thresh) {
2144                         uint16_t next_rdt = rxq->rx_free_trigger;
2145
2146                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2147                                 rte_wmb();
2148                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2149                                                         rxq->rdt_reg_addr,
2150                                                         next_rdt);
2151                                 nb_hold -= rxq->rx_free_thresh;
2152                         } else {
2153                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2154                                                   "port_id=%u queue_id=%u",
2155                                            rxq->port_id, rxq->queue_id);
2156
2157                                 rte_eth_devices[rxq->port_id].data->
2158                                                         rx_mbuf_alloc_failed++;
2159                                 break;
2160                         }
2161                 }
2162
2163                 nb_hold++;
2164                 rxe = &sw_ring[rx_id];
2165                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2166
2167                 next_id = rx_id + 1;
2168                 if (next_id == rxq->nb_rx_desc)
2169                         next_id = 0;
2170
2171                 /* Prefetch next mbuf while processing current one. */
2172                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2173
2174                 /*
2175                  * When next RX descriptor is on a cache-line boundary,
2176                  * prefetch the next 4 RX descriptors and the next 4 pointers
2177                  * to mbufs.
2178                  */
2179                 if ((next_id & 0x3) == 0) {
2180                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2181                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2182                 }
2183
2184                 rxm = rxe->mbuf;
2185
2186                 if (!bulk_alloc) {
2187                         __le64 dma =
2188                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2189                         /*
2190                          * Update RX descriptor with the physical address of the
2191                          * new data buffer of the new allocated mbuf.
2192                          */
2193                         rxe->mbuf = nmb;
2194
2195                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2196                         rxdp->read.hdr_addr = 0;
2197                         rxdp->read.pkt_addr = dma;
2198                 } else
2199                         rxe->mbuf = NULL;
2200
2201                 /*
2202                  * Set data length & data buffer address of mbuf.
2203                  */
2204                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2205                 rxm->data_len = data_len;
2206
2207                 if (!eop) {
2208                         uint16_t nextp_id;
2209                         /*
2210                          * Get next descriptor index:
2211                          *  - For RSC it's in the NEXTP field.
2212                          *  - For a scattered packet - it's just a following
2213                          *    descriptor.
2214                          */
2215                         if (ixgbe_rsc_count(&rxd))
2216                                 nextp_id =
2217                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2218                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2219                         else
2220                                 nextp_id = next_id;
2221
2222                         next_sc_entry = &sw_sc_ring[nextp_id];
2223                         next_rxe = &sw_ring[nextp_id];
2224                         rte_ixgbe_prefetch(next_rxe);
2225                 }
2226
2227                 sc_entry = &sw_sc_ring[rx_id];
2228                 first_seg = sc_entry->fbuf;
2229                 sc_entry->fbuf = NULL;
2230
2231                 /*
2232                  * If this is the first buffer of the received packet,
2233                  * set the pointer to the first mbuf of the packet and
2234                  * initialize its context.
2235                  * Otherwise, update the total length and the number of segments
2236                  * of the current scattered packet, and update the pointer to
2237                  * the last mbuf of the current packet.
2238                  */
2239                 if (first_seg == NULL) {
2240                         first_seg = rxm;
2241                         first_seg->pkt_len = data_len;
2242                         first_seg->nb_segs = 1;
2243                 } else {
2244                         first_seg->pkt_len += data_len;
2245                         first_seg->nb_segs++;
2246                 }
2247
2248                 prev_id = rx_id;
2249                 rx_id = next_id;
2250
2251                 /*
2252                  * If this is not the last buffer of the received packet, update
2253                  * the pointer to the first mbuf at the NEXTP entry in the
2254                  * sw_sc_ring and continue to parse the RX ring.
2255                  */
2256                 if (!eop && next_rxe) {
2257                         rxm->next = next_rxe->mbuf;
2258                         next_sc_entry->fbuf = first_seg;
2259                         goto next_desc;
2260                 }
2261
2262                 /* Initialize the first mbuf of the returned packet */
2263                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2264
2265                 /*
2266                  * Deal with the case, when HW CRC srip is disabled.
2267                  * That can't happen when LRO is enabled, but still could
2268                  * happen for scattered RX mode.
2269                  */
2270                 first_seg->pkt_len -= rxq->crc_len;
2271                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2272                         struct rte_mbuf *lp;
2273
2274                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2275                                 ;
2276
2277                         first_seg->nb_segs--;
2278                         lp->data_len -= rxq->crc_len - rxm->data_len;
2279                         lp->next = NULL;
2280                         rte_pktmbuf_free_seg(rxm);
2281                 } else
2282                         rxm->data_len -= rxq->crc_len;
2283
2284                 /* Prefetch data of first segment, if configured to do so. */
2285                 rte_packet_prefetch((char *)first_seg->buf_addr +
2286                         first_seg->data_off);
2287
2288                 /*
2289                  * Store the mbuf address into the next entry of the array
2290                  * of returned packets.
2291                  */
2292                 rx_pkts[nb_rx++] = first_seg;
2293         }
2294
2295         /*
2296          * Record index of the next RX descriptor to probe.
2297          */
2298         rxq->rx_tail = rx_id;
2299
2300         /*
2301          * If the number of free RX descriptors is greater than the RX free
2302          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2303          * register.
2304          * Update the RDT with the value of the last processed RX descriptor
2305          * minus 1, to guarantee that the RDT register is never equal to the
2306          * RDH register, which creates a "full" ring situtation from the
2307          * hardware point of view...
2308          */
2309         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2310                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2311                            "nb_hold=%u nb_rx=%u",
2312                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2313
2314                 rte_wmb();
2315                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2316                 nb_hold = 0;
2317         }
2318
2319         rxq->nb_rx_hold = nb_hold;
2320         return nb_rx;
2321 }
2322
2323 uint16_t
2324 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2325                                  uint16_t nb_pkts)
2326 {
2327         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2328 }
2329
2330 uint16_t
2331 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2332                                uint16_t nb_pkts)
2333 {
2334         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2335 }
2336
2337 /*********************************************************************
2338  *
2339  *  Queue management functions
2340  *
2341  **********************************************************************/
2342
2343 static void __rte_cold
2344 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2345 {
2346         unsigned i;
2347
2348         if (txq->sw_ring != NULL) {
2349                 for (i = 0; i < txq->nb_tx_desc; i++) {
2350                         if (txq->sw_ring[i].mbuf != NULL) {
2351                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2352                                 txq->sw_ring[i].mbuf = NULL;
2353                         }
2354                 }
2355         }
2356 }
2357
2358 static int
2359 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2360 {
2361         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2362         uint16_t i, tx_last, tx_id;
2363         uint16_t nb_tx_free_last;
2364         uint16_t nb_tx_to_clean;
2365         uint32_t pkt_cnt;
2366
2367         /* Start free mbuf from the next of tx_tail */
2368         tx_last = txq->tx_tail;
2369         tx_id  = swr_ring[tx_last].next_id;
2370
2371         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2372                 return 0;
2373
2374         nb_tx_to_clean = txq->nb_tx_free;
2375         nb_tx_free_last = txq->nb_tx_free;
2376         if (!free_cnt)
2377                 free_cnt = txq->nb_tx_desc;
2378
2379         /* Loop through swr_ring to count the amount of
2380          * freeable mubfs and packets.
2381          */
2382         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2383                 for (i = 0; i < nb_tx_to_clean &&
2384                         pkt_cnt < free_cnt &&
2385                         tx_id != tx_last; i++) {
2386                         if (swr_ring[tx_id].mbuf != NULL) {
2387                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2388                                 swr_ring[tx_id].mbuf = NULL;
2389
2390                                 /*
2391                                  * last segment in the packet,
2392                                  * increment packet count
2393                                  */
2394                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2395                         }
2396
2397                         tx_id = swr_ring[tx_id].next_id;
2398                 }
2399
2400                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2401                         txq->nb_tx_free || tx_id == tx_last)
2402                         break;
2403
2404                 if (pkt_cnt < free_cnt) {
2405                         if (ixgbe_xmit_cleanup(txq))
2406                                 break;
2407
2408                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2409                         nb_tx_free_last = txq->nb_tx_free;
2410                 }
2411         }
2412
2413         return (int)pkt_cnt;
2414 }
2415
2416 static int
2417 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2418                         uint32_t free_cnt)
2419 {
2420         int i, n, cnt;
2421
2422         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2423                 free_cnt = txq->nb_tx_desc;
2424
2425         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2426
2427         for (i = 0; i < cnt; i += n) {
2428                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2429                         break;
2430
2431                 n = ixgbe_tx_free_bufs(txq);
2432
2433                 if (n == 0)
2434                         break;
2435         }
2436
2437         return i;
2438 }
2439
2440 static int
2441 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2442                         uint32_t free_cnt __rte_unused)
2443 {
2444         return -ENOTSUP;
2445 }
2446
2447 int
2448 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2449 {
2450         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2451         if (txq->offloads == 0 &&
2452 #ifdef RTE_LIB_SECURITY
2453                         !(txq->using_ipsec) &&
2454 #endif
2455                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2456                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2457                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2458                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2459                                         txq->sw_ring_v != NULL)) {
2460                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2461                 } else {
2462                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2463                 }
2464         }
2465
2466         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2467 }
2468
2469 static void __rte_cold
2470 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2471 {
2472         if (txq != NULL &&
2473             txq->sw_ring != NULL)
2474                 rte_free(txq->sw_ring);
2475 }
2476
2477 static void __rte_cold
2478 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2479 {
2480         if (txq != NULL && txq->ops != NULL) {
2481                 txq->ops->release_mbufs(txq);
2482                 txq->ops->free_swring(txq);
2483                 rte_memzone_free(txq->mz);
2484                 rte_free(txq);
2485         }
2486 }
2487
2488 void __rte_cold
2489 ixgbe_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
2490 {
2491         ixgbe_tx_queue_release(dev->data->tx_queues[qid]);
2492 }
2493
2494 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2495 static void __rte_cold
2496 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2497 {
2498         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2499         struct ixgbe_tx_entry *txe = txq->sw_ring;
2500         uint16_t prev, i;
2501
2502         /* Zero out HW ring memory */
2503         for (i = 0; i < txq->nb_tx_desc; i++) {
2504                 txq->tx_ring[i] = zeroed_desc;
2505         }
2506
2507         /* Initialize SW ring entries */
2508         prev = (uint16_t) (txq->nb_tx_desc - 1);
2509         for (i = 0; i < txq->nb_tx_desc; i++) {
2510                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2511
2512                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2513                 txe[i].mbuf = NULL;
2514                 txe[i].last_id = i;
2515                 txe[prev].next_id = i;
2516                 prev = i;
2517         }
2518
2519         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2520         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2521
2522         txq->tx_tail = 0;
2523         txq->nb_tx_used = 0;
2524         /*
2525          * Always allow 1 descriptor to be un-allocated to avoid
2526          * a H/W race condition
2527          */
2528         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2529         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2530         txq->ctx_curr = 0;
2531         memset((void *)&txq->ctx_cache, 0,
2532                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2533 }
2534
2535 static const struct ixgbe_txq_ops def_txq_ops = {
2536         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2537         .free_swring = ixgbe_tx_free_swring,
2538         .reset = ixgbe_reset_tx_queue,
2539 };
2540
2541 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2542  * the queue parameters. Used in tx_queue_setup by primary process and then
2543  * in dev_init by secondary process when attaching to an existing ethdev.
2544  */
2545 void __rte_cold
2546 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2547 {
2548         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2549         if ((txq->offloads == 0) &&
2550 #ifdef RTE_LIB_SECURITY
2551                         !(txq->using_ipsec) &&
2552 #endif
2553                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2554                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2555                 dev->tx_pkt_prepare = NULL;
2556                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2557                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2558                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2559                                         ixgbe_txq_vec_setup(txq) == 0)) {
2560                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2561                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2562                 } else
2563                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2564         } else {
2565                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2566                 PMD_INIT_LOG(DEBUG,
2567                                 " - offloads = 0x%" PRIx64,
2568                                 txq->offloads);
2569                 PMD_INIT_LOG(DEBUG,
2570                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2571                                 (unsigned long)txq->tx_rs_thresh,
2572                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2573                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2574                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2575         }
2576 }
2577
2578 uint64_t
2579 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2580 {
2581         RTE_SET_USED(dev);
2582
2583         return 0;
2584 }
2585
2586 uint64_t
2587 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2588 {
2589         uint64_t tx_offload_capa;
2590         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2591
2592         tx_offload_capa =
2593                 RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
2594                 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM  |
2595                 RTE_ETH_TX_OFFLOAD_UDP_CKSUM   |
2596                 RTE_ETH_TX_OFFLOAD_TCP_CKSUM   |
2597                 RTE_ETH_TX_OFFLOAD_SCTP_CKSUM  |
2598                 RTE_ETH_TX_OFFLOAD_TCP_TSO     |
2599                 RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2600
2601         if (hw->mac.type == ixgbe_mac_82599EB ||
2602             hw->mac.type == ixgbe_mac_X540)
2603                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_MACSEC_INSERT;
2604
2605         if (hw->mac.type == ixgbe_mac_X550 ||
2606             hw->mac.type == ixgbe_mac_X550EM_x ||
2607             hw->mac.type == ixgbe_mac_X550EM_a)
2608                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2609
2610 #ifdef RTE_LIB_SECURITY
2611         if (dev->security_ctx)
2612                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_SECURITY;
2613 #endif
2614         return tx_offload_capa;
2615 }
2616
2617 int __rte_cold
2618 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2619                          uint16_t queue_idx,
2620                          uint16_t nb_desc,
2621                          unsigned int socket_id,
2622                          const struct rte_eth_txconf *tx_conf)
2623 {
2624         const struct rte_memzone *tz;
2625         struct ixgbe_tx_queue *txq;
2626         struct ixgbe_hw     *hw;
2627         uint16_t tx_rs_thresh, tx_free_thresh;
2628         uint64_t offloads;
2629
2630         PMD_INIT_FUNC_TRACE();
2631         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2632
2633         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2634
2635         /*
2636          * Validate number of transmit descriptors.
2637          * It must not exceed hardware maximum, and must be multiple
2638          * of IXGBE_ALIGN.
2639          */
2640         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2641                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2642                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2643                 return -EINVAL;
2644         }
2645
2646         /*
2647          * The following two parameters control the setting of the RS bit on
2648          * transmit descriptors.
2649          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2650          * descriptors have been used.
2651          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2652          * descriptors are used or if the number of descriptors required
2653          * to transmit a packet is greater than the number of free TX
2654          * descriptors.
2655          * The following constraints must be satisfied:
2656          *  tx_rs_thresh must be greater than 0.
2657          *  tx_rs_thresh must be less than the size of the ring minus 2.
2658          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2659          *  tx_rs_thresh must be a divisor of the ring size.
2660          *  tx_free_thresh must be greater than 0.
2661          *  tx_free_thresh must be less than the size of the ring minus 3.
2662          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2663          * One descriptor in the TX ring is used as a sentinel to avoid a
2664          * H/W race condition, hence the maximum threshold constraints.
2665          * When set to zero use default values.
2666          */
2667         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2668                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2669         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2670         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2671                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2672         if (tx_conf->tx_rs_thresh > 0)
2673                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2674         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2675                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2676                              "exceed nb_desc. (tx_rs_thresh=%u "
2677                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2678                              (unsigned int)tx_rs_thresh,
2679                              (unsigned int)tx_free_thresh,
2680                              (unsigned int)nb_desc,
2681                              (int)dev->data->port_id,
2682                              (int)queue_idx);
2683                 return -(EINVAL);
2684         }
2685         if (tx_rs_thresh >= (nb_desc - 2)) {
2686                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2687                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2688                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2689                         (int)dev->data->port_id, (int)queue_idx);
2690                 return -(EINVAL);
2691         }
2692         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2693                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2694                         "(tx_rs_thresh=%u port=%d queue=%d)",
2695                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2696                         (int)dev->data->port_id, (int)queue_idx);
2697                 return -(EINVAL);
2698         }
2699         if (tx_free_thresh >= (nb_desc - 3)) {
2700                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2701                              "tx_free_thresh must be less than the number of "
2702                              "TX descriptors minus 3. (tx_free_thresh=%u "
2703                              "port=%d queue=%d)",
2704                              (unsigned int)tx_free_thresh,
2705                              (int)dev->data->port_id, (int)queue_idx);
2706                 return -(EINVAL);
2707         }
2708         if (tx_rs_thresh > tx_free_thresh) {
2709                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2710                              "tx_free_thresh. (tx_free_thresh=%u "
2711                              "tx_rs_thresh=%u port=%d queue=%d)",
2712                              (unsigned int)tx_free_thresh,
2713                              (unsigned int)tx_rs_thresh,
2714                              (int)dev->data->port_id,
2715                              (int)queue_idx);
2716                 return -(EINVAL);
2717         }
2718         if ((nb_desc % tx_rs_thresh) != 0) {
2719                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2720                              "number of TX descriptors. (tx_rs_thresh=%u "
2721                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2722                              (int)dev->data->port_id, (int)queue_idx);
2723                 return -(EINVAL);
2724         }
2725
2726         /*
2727          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2728          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2729          * by the NIC and all descriptors are written back after the NIC
2730          * accumulates WTHRESH descriptors.
2731          */
2732         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2733                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2734                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2735                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2736                              (int)dev->data->port_id, (int)queue_idx);
2737                 return -(EINVAL);
2738         }
2739
2740         /* Free memory prior to re-allocation if needed... */
2741         if (dev->data->tx_queues[queue_idx] != NULL) {
2742                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2743                 dev->data->tx_queues[queue_idx] = NULL;
2744         }
2745
2746         /* First allocate the tx queue data structure */
2747         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2748                                  RTE_CACHE_LINE_SIZE, socket_id);
2749         if (txq == NULL)
2750                 return -ENOMEM;
2751
2752         /*
2753          * Allocate TX ring hardware descriptors. A memzone large enough to
2754          * handle the maximum ring size is allocated in order to allow for
2755          * resizing in later calls to the queue setup function.
2756          */
2757         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2758                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2759                         IXGBE_ALIGN, socket_id);
2760         if (tz == NULL) {
2761                 ixgbe_tx_queue_release(txq);
2762                 return -ENOMEM;
2763         }
2764
2765         txq->mz = tz;
2766         txq->nb_tx_desc = nb_desc;
2767         txq->tx_rs_thresh = tx_rs_thresh;
2768         txq->tx_free_thresh = tx_free_thresh;
2769         txq->pthresh = tx_conf->tx_thresh.pthresh;
2770         txq->hthresh = tx_conf->tx_thresh.hthresh;
2771         txq->wthresh = tx_conf->tx_thresh.wthresh;
2772         txq->queue_id = queue_idx;
2773         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2774                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2775         txq->port_id = dev->data->port_id;
2776         txq->offloads = offloads;
2777         txq->ops = &def_txq_ops;
2778         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2779 #ifdef RTE_LIB_SECURITY
2780         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2781                         RTE_ETH_TX_OFFLOAD_SECURITY);
2782 #endif
2783
2784         /*
2785          * Modification to set VFTDT for virtual function if vf is detected
2786          */
2787         if (hw->mac.type == ixgbe_mac_82599_vf ||
2788             hw->mac.type == ixgbe_mac_X540_vf ||
2789             hw->mac.type == ixgbe_mac_X550_vf ||
2790             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2791             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2792                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2793         else
2794                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2795
2796         txq->tx_ring_phys_addr = tz->iova;
2797         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2798
2799         /* Allocate software ring */
2800         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2801                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2802                                 RTE_CACHE_LINE_SIZE, socket_id);
2803         if (txq->sw_ring == NULL) {
2804                 ixgbe_tx_queue_release(txq);
2805                 return -ENOMEM;
2806         }
2807         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2808                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2809
2810         /* set up vector or scalar TX function as appropriate */
2811         ixgbe_set_tx_function(dev, txq);
2812
2813         txq->ops->reset(txq);
2814
2815         dev->data->tx_queues[queue_idx] = txq;
2816
2817
2818         return 0;
2819 }
2820
2821 /**
2822  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2823  *
2824  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2825  * in the sw_rsc_ring is not set to NULL but rather points to the next
2826  * mbuf of this RSC aggregation (that has not been completed yet and still
2827  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2828  * will just free first "nb_segs" segments of the cluster explicitly by calling
2829  * an rte_pktmbuf_free_seg().
2830  *
2831  * @m scattered cluster head
2832  */
2833 static void __rte_cold
2834 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2835 {
2836         uint16_t i, nb_segs = m->nb_segs;
2837         struct rte_mbuf *next_seg;
2838
2839         for (i = 0; i < nb_segs; i++) {
2840                 next_seg = m->next;
2841                 rte_pktmbuf_free_seg(m);
2842                 m = next_seg;
2843         }
2844 }
2845
2846 static void __rte_cold
2847 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2848 {
2849         unsigned i;
2850
2851         /* SSE Vector driver has a different way of releasing mbufs. */
2852         if (rxq->rx_using_sse) {
2853                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2854                 return;
2855         }
2856
2857         if (rxq->sw_ring != NULL) {
2858                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2859                         if (rxq->sw_ring[i].mbuf != NULL) {
2860                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2861                                 rxq->sw_ring[i].mbuf = NULL;
2862                         }
2863                 }
2864                 if (rxq->rx_nb_avail) {
2865                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2866                                 struct rte_mbuf *mb;
2867
2868                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2869                                 rte_pktmbuf_free_seg(mb);
2870                         }
2871                         rxq->rx_nb_avail = 0;
2872                 }
2873         }
2874
2875         if (rxq->sw_sc_ring)
2876                 for (i = 0; i < rxq->nb_rx_desc; i++)
2877                         if (rxq->sw_sc_ring[i].fbuf) {
2878                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2879                                 rxq->sw_sc_ring[i].fbuf = NULL;
2880                         }
2881 }
2882
2883 static void __rte_cold
2884 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2885 {
2886         if (rxq != NULL) {
2887                 ixgbe_rx_queue_release_mbufs(rxq);
2888                 rte_free(rxq->sw_ring);
2889                 rte_free(rxq->sw_sc_ring);
2890                 rte_memzone_free(rxq->mz);
2891                 rte_free(rxq);
2892         }
2893 }
2894
2895 void __rte_cold
2896 ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
2897 {
2898         ixgbe_rx_queue_release(dev->data->rx_queues[qid]);
2899 }
2900
2901 /*
2902  * Check if Rx Burst Bulk Alloc function can be used.
2903  * Return
2904  *        0: the preconditions are satisfied and the bulk allocation function
2905  *           can be used.
2906  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2907  *           function must be used.
2908  */
2909 static inline int __rte_cold
2910 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2911 {
2912         int ret = 0;
2913
2914         /*
2915          * Make sure the following pre-conditions are satisfied:
2916          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2917          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2918          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2919          * Scattered packets are not supported.  This should be checked
2920          * outside of this function.
2921          */
2922         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2923                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2924                              "rxq->rx_free_thresh=%d, "
2925                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2926                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2927                 ret = -EINVAL;
2928         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2929                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2930                              "rxq->rx_free_thresh=%d, "
2931                              "rxq->nb_rx_desc=%d",
2932                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2933                 ret = -EINVAL;
2934         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2935                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2936                              "rxq->nb_rx_desc=%d, "
2937                              "rxq->rx_free_thresh=%d",
2938                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2939                 ret = -EINVAL;
2940         }
2941
2942         return ret;
2943 }
2944
2945 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2946 static void __rte_cold
2947 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2948 {
2949         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2950         unsigned i;
2951         uint16_t len = rxq->nb_rx_desc;
2952
2953         /*
2954          * By default, the Rx queue setup function allocates enough memory for
2955          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2956          * extra memory at the end of the descriptor ring to be zero'd out.
2957          */
2958         if (adapter->rx_bulk_alloc_allowed)
2959                 /* zero out extra memory */
2960                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2961
2962         /*
2963          * Zero out HW ring memory. Zero out extra memory at the end of
2964          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2965          * reads extra memory as zeros.
2966          */
2967         for (i = 0; i < len; i++) {
2968                 rxq->rx_ring[i] = zeroed_desc;
2969         }
2970
2971         /*
2972          * initialize extra software ring entries. Space for these extra
2973          * entries is always allocated
2974          */
2975         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2976         for (i = rxq->nb_rx_desc; i < len; ++i) {
2977                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2978         }
2979
2980         rxq->rx_nb_avail = 0;
2981         rxq->rx_next_avail = 0;
2982         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2983         rxq->rx_tail = 0;
2984         rxq->nb_rx_hold = 0;
2985
2986         if (rxq->pkt_first_seg != NULL)
2987                 rte_pktmbuf_free(rxq->pkt_first_seg);
2988
2989         rxq->pkt_first_seg = NULL;
2990         rxq->pkt_last_seg = NULL;
2991
2992 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2993         rxq->rxrearm_start = 0;
2994         rxq->rxrearm_nb = 0;
2995 #endif
2996 }
2997
2998 static int
2999 ixgbe_is_vf(struct rte_eth_dev *dev)
3000 {
3001         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3002
3003         switch (hw->mac.type) {
3004         case ixgbe_mac_82599_vf:
3005         case ixgbe_mac_X540_vf:
3006         case ixgbe_mac_X550_vf:
3007         case ixgbe_mac_X550EM_x_vf:
3008         case ixgbe_mac_X550EM_a_vf:
3009                 return 1;
3010         default:
3011                 return 0;
3012         }
3013 }
3014
3015 uint64_t
3016 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
3017 {
3018         uint64_t offloads = 0;
3019         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3020
3021         if (hw->mac.type != ixgbe_mac_82598EB)
3022                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3023
3024         return offloads;
3025 }
3026
3027 uint64_t
3028 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3029 {
3030         uint64_t offloads;
3031         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3032
3033         offloads = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM  |
3034                    RTE_ETH_RX_OFFLOAD_UDP_CKSUM   |
3035                    RTE_ETH_RX_OFFLOAD_TCP_CKSUM   |
3036                    RTE_ETH_RX_OFFLOAD_KEEP_CRC    |
3037                    RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
3038                    RTE_ETH_RX_OFFLOAD_SCATTER |
3039                    RTE_ETH_RX_OFFLOAD_RSS_HASH;
3040
3041         if (hw->mac.type == ixgbe_mac_82598EB)
3042                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3043
3044         if (ixgbe_is_vf(dev) == 0)
3045                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_EXTEND;
3046
3047         /*
3048          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3049          * mode.
3050          */
3051         if ((hw->mac.type == ixgbe_mac_82599EB ||
3052              hw->mac.type == ixgbe_mac_X540 ||
3053              hw->mac.type == ixgbe_mac_X550) &&
3054             !RTE_ETH_DEV_SRIOV(dev).active)
3055                 offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3056
3057         if (hw->mac.type == ixgbe_mac_82599EB ||
3058             hw->mac.type == ixgbe_mac_X540)
3059                 offloads |= RTE_ETH_RX_OFFLOAD_MACSEC_STRIP;
3060
3061         if (hw->mac.type == ixgbe_mac_X550 ||
3062             hw->mac.type == ixgbe_mac_X550EM_x ||
3063             hw->mac.type == ixgbe_mac_X550EM_a)
3064                 offloads |= RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3065
3066 #ifdef RTE_LIB_SECURITY
3067         if (dev->security_ctx)
3068                 offloads |= RTE_ETH_RX_OFFLOAD_SECURITY;
3069 #endif
3070
3071         return offloads;
3072 }
3073
3074 int __rte_cold
3075 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3076                          uint16_t queue_idx,
3077                          uint16_t nb_desc,
3078                          unsigned int socket_id,
3079                          const struct rte_eth_rxconf *rx_conf,
3080                          struct rte_mempool *mp)
3081 {
3082         const struct rte_memzone *rz;
3083         struct ixgbe_rx_queue *rxq;
3084         struct ixgbe_hw     *hw;
3085         uint16_t len;
3086         struct ixgbe_adapter *adapter = dev->data->dev_private;
3087         uint64_t offloads;
3088
3089         PMD_INIT_FUNC_TRACE();
3090         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3091
3092         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3093
3094         /*
3095          * Validate number of receive descriptors.
3096          * It must not exceed hardware maximum, and must be multiple
3097          * of IXGBE_ALIGN.
3098          */
3099         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3100                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3101                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3102                 return -EINVAL;
3103         }
3104
3105         /* Free memory prior to re-allocation if needed... */
3106         if (dev->data->rx_queues[queue_idx] != NULL) {
3107                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3108                 dev->data->rx_queues[queue_idx] = NULL;
3109         }
3110
3111         /* First allocate the rx queue data structure */
3112         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3113                                  RTE_CACHE_LINE_SIZE, socket_id);
3114         if (rxq == NULL)
3115                 return -ENOMEM;
3116         rxq->mb_pool = mp;
3117         rxq->nb_rx_desc = nb_desc;
3118         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3119         rxq->queue_id = queue_idx;
3120         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3121                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3122         rxq->port_id = dev->data->port_id;
3123         if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
3124                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3125         else
3126                 rxq->crc_len = 0;
3127         rxq->drop_en = rx_conf->rx_drop_en;
3128         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3129         rxq->offloads = offloads;
3130
3131         /*
3132          * The packet type in RX descriptor is different for different NICs.
3133          * Some bits are used for x550 but reserved for other NICS.
3134          * So set different masks for different NICs.
3135          */
3136         if (hw->mac.type == ixgbe_mac_X550 ||
3137             hw->mac.type == ixgbe_mac_X550EM_x ||
3138             hw->mac.type == ixgbe_mac_X550EM_a ||
3139             hw->mac.type == ixgbe_mac_X550_vf ||
3140             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3141             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3142                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3143         else
3144                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3145
3146         /*
3147          * 82599 errata, UDP frames with a 0 checksum can be marked as checksum
3148          * errors.
3149          */
3150         if (hw->mac.type == ixgbe_mac_82599EB)
3151                 rxq->rx_udp_csum_zero_err = 1;
3152
3153         /*
3154          * Allocate RX ring hardware descriptors. A memzone large enough to
3155          * handle the maximum ring size is allocated in order to allow for
3156          * resizing in later calls to the queue setup function.
3157          */
3158         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3159                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3160         if (rz == NULL) {
3161                 ixgbe_rx_queue_release(rxq);
3162                 return -ENOMEM;
3163         }
3164
3165         rxq->mz = rz;
3166         /*
3167          * Zero init all the descriptors in the ring.
3168          */
3169         memset(rz->addr, 0, RX_RING_SZ);
3170
3171         /*
3172          * Modified to setup VFRDT for Virtual Function
3173          */
3174         if (hw->mac.type == ixgbe_mac_82599_vf ||
3175             hw->mac.type == ixgbe_mac_X540_vf ||
3176             hw->mac.type == ixgbe_mac_X550_vf ||
3177             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3178             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3179                 rxq->rdt_reg_addr =
3180                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3181                 rxq->rdh_reg_addr =
3182                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3183         } else {
3184                 rxq->rdt_reg_addr =
3185                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3186                 rxq->rdh_reg_addr =
3187                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3188         }
3189
3190         rxq->rx_ring_phys_addr = rz->iova;
3191         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3192
3193         /*
3194          * Certain constraints must be met in order to use the bulk buffer
3195          * allocation Rx burst function. If any of Rx queues doesn't meet them
3196          * the feature should be disabled for the whole port.
3197          */
3198         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3199                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3200                                     "preconditions - canceling the feature for "
3201                                     "the whole port[%d]",
3202                              rxq->queue_id, rxq->port_id);
3203                 adapter->rx_bulk_alloc_allowed = false;
3204         }
3205
3206         /*
3207          * Allocate software ring. Allow for space at the end of the
3208          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3209          * function does not access an invalid memory region.
3210          */
3211         len = nb_desc;
3212         if (adapter->rx_bulk_alloc_allowed)
3213                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3214
3215         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3216                                           sizeof(struct ixgbe_rx_entry) * len,
3217                                           RTE_CACHE_LINE_SIZE, socket_id);
3218         if (!rxq->sw_ring) {
3219                 ixgbe_rx_queue_release(rxq);
3220                 return -ENOMEM;
3221         }
3222
3223         /*
3224          * Always allocate even if it's not going to be needed in order to
3225          * simplify the code.
3226          *
3227          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3228          * be requested in ixgbe_dev_rx_init(), which is called later from
3229          * dev_start() flow.
3230          */
3231         rxq->sw_sc_ring =
3232                 rte_zmalloc_socket("rxq->sw_sc_ring",
3233                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3234                                    RTE_CACHE_LINE_SIZE, socket_id);
3235         if (!rxq->sw_sc_ring) {
3236                 ixgbe_rx_queue_release(rxq);
3237                 return -ENOMEM;
3238         }
3239
3240         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3241                             "dma_addr=0x%"PRIx64,
3242                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3243                      rxq->rx_ring_phys_addr);
3244
3245         if (!rte_is_power_of_2(nb_desc)) {
3246                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3247                                     "preconditions - canceling the feature for "
3248                                     "the whole port[%d]",
3249                              rxq->queue_id, rxq->port_id);
3250                 adapter->rx_vec_allowed = false;
3251         } else
3252                 ixgbe_rxq_vec_setup(rxq);
3253
3254         dev->data->rx_queues[queue_idx] = rxq;
3255
3256         ixgbe_reset_rx_queue(adapter, rxq);
3257
3258         return 0;
3259 }
3260
3261 uint32_t
3262 ixgbe_dev_rx_queue_count(void *rx_queue)
3263 {
3264 #define IXGBE_RXQ_SCAN_INTERVAL 4
3265         volatile union ixgbe_adv_rx_desc *rxdp;
3266         struct ixgbe_rx_queue *rxq;
3267         uint32_t desc = 0;
3268
3269         rxq = rx_queue;
3270         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3271
3272         while ((desc < rxq->nb_rx_desc) &&
3273                 (rxdp->wb.upper.status_error &
3274                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3275                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3276                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3277                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3278                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3279                                 desc - rxq->nb_rx_desc]);
3280         }
3281
3282         return desc;
3283 }
3284
3285 int
3286 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3287 {
3288         struct ixgbe_rx_queue *rxq = rx_queue;
3289         volatile uint32_t *status;
3290         uint32_t nb_hold, desc;
3291
3292         if (unlikely(offset >= rxq->nb_rx_desc))
3293                 return -EINVAL;
3294
3295 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3296         if (rxq->rx_using_sse)
3297                 nb_hold = rxq->rxrearm_nb;
3298         else
3299 #endif
3300                 nb_hold = rxq->nb_rx_hold;
3301         if (offset >= rxq->nb_rx_desc - nb_hold)
3302                 return RTE_ETH_RX_DESC_UNAVAIL;
3303
3304         desc = rxq->rx_tail + offset;
3305         if (desc >= rxq->nb_rx_desc)
3306                 desc -= rxq->nb_rx_desc;
3307
3308         status = &rxq->rx_ring[desc].wb.upper.status_error;
3309         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3310                 return RTE_ETH_RX_DESC_DONE;
3311
3312         return RTE_ETH_RX_DESC_AVAIL;
3313 }
3314
3315 int
3316 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3317 {
3318         struct ixgbe_tx_queue *txq = tx_queue;
3319         volatile uint32_t *status;
3320         uint32_t desc;
3321
3322         if (unlikely(offset >= txq->nb_tx_desc))
3323                 return -EINVAL;
3324
3325         desc = txq->tx_tail + offset;
3326         /* go to next desc that has the RS bit */
3327         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3328                 txq->tx_rs_thresh;
3329         if (desc >= txq->nb_tx_desc) {
3330                 desc -= txq->nb_tx_desc;
3331                 if (desc >= txq->nb_tx_desc)
3332                         desc -= txq->nb_tx_desc;
3333         }
3334
3335         status = &txq->tx_ring[desc].wb.status;
3336         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3337                 return RTE_ETH_TX_DESC_DONE;
3338
3339         return RTE_ETH_TX_DESC_FULL;
3340 }
3341
3342 /*
3343  * Set up link loopback for X540/X550 mode Tx->Rx.
3344  */
3345 static inline void __rte_cold
3346 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3347 {
3348         uint32_t macc;
3349         PMD_INIT_FUNC_TRACE();
3350
3351         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3352
3353         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3354                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3355         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3356
3357         if (enable) {
3358                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3359                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3360                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3361                 macc |= IXGBE_MACC_FLU;
3362         } else {
3363                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3364                 macc &= ~IXGBE_MACC_FLU;
3365         }
3366
3367         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3368                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3369
3370         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3371 }
3372
3373 void __rte_cold
3374 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3375 {
3376         unsigned i;
3377         struct ixgbe_adapter *adapter = dev->data->dev_private;
3378         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3379
3380         PMD_INIT_FUNC_TRACE();
3381
3382         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3383                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3384
3385                 if (txq != NULL) {
3386                         txq->ops->release_mbufs(txq);
3387                         txq->ops->reset(txq);
3388                 }
3389         }
3390
3391         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3392                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3393
3394                 if (rxq != NULL) {
3395                         ixgbe_rx_queue_release_mbufs(rxq);
3396                         ixgbe_reset_rx_queue(adapter, rxq);
3397                 }
3398         }
3399         /* If loopback mode was enabled, reconfigure the link accordingly */
3400         if (dev->data->dev_conf.lpbk_mode != 0) {
3401                 if (hw->mac.type == ixgbe_mac_X540 ||
3402                      hw->mac.type == ixgbe_mac_X550 ||
3403                      hw->mac.type == ixgbe_mac_X550EM_x ||
3404                      hw->mac.type == ixgbe_mac_X550EM_a)
3405                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3406         }
3407 }
3408
3409 void
3410 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3411 {
3412         unsigned i;
3413
3414         PMD_INIT_FUNC_TRACE();
3415
3416         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3417                 ixgbe_dev_rx_queue_release(dev, i);
3418                 dev->data->rx_queues[i] = NULL;
3419         }
3420         dev->data->nb_rx_queues = 0;
3421
3422         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3423                 ixgbe_dev_tx_queue_release(dev, i);
3424                 dev->data->tx_queues[i] = NULL;
3425         }
3426         dev->data->nb_tx_queues = 0;
3427 }
3428
3429 /*********************************************************************
3430  *
3431  *  Device RX/TX init functions
3432  *
3433  **********************************************************************/
3434
3435 /**
3436  * Receive Side Scaling (RSS)
3437  * See section 7.1.2.8 in the following document:
3438  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3439  *
3440  * Principles:
3441  * The source and destination IP addresses of the IP header and the source
3442  * and destination ports of TCP/UDP headers, if any, of received packets are
3443  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3444  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3445  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3446  * RSS output index which is used as the RX queue index where to store the
3447  * received packets.
3448  * The following output is supplied in the RX write-back descriptor:
3449  *     - 32-bit result of the Microsoft RSS hash function,
3450  *     - 4-bit RSS type field.
3451  */
3452
3453 /*
3454  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3455  * Used as the default key.
3456  */
3457 static uint8_t rss_intel_key[40] = {
3458         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3459         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3460         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3461         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3462         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3463 };
3464
3465 static void
3466 ixgbe_rss_disable(struct rte_eth_dev *dev)
3467 {
3468         struct ixgbe_hw *hw;
3469         uint32_t mrqc;
3470         uint32_t mrqc_reg;
3471
3472         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3473         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3474         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3475         mrqc &= ~IXGBE_MRQC_RSSEN;
3476         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3477 }
3478
3479 static void
3480 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3481 {
3482         uint8_t  *hash_key;
3483         uint32_t mrqc;
3484         uint32_t rss_key;
3485         uint64_t rss_hf;
3486         uint16_t i;
3487         uint32_t mrqc_reg;
3488         uint32_t rssrk_reg;
3489
3490         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3491         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3492
3493         hash_key = rss_conf->rss_key;
3494         if (hash_key != NULL) {
3495                 /* Fill in RSS hash key */
3496                 for (i = 0; i < 10; i++) {
3497                         rss_key  = hash_key[(i * 4)];
3498                         rss_key |= hash_key[(i * 4) + 1] << 8;
3499                         rss_key |= hash_key[(i * 4) + 2] << 16;
3500                         rss_key |= hash_key[(i * 4) + 3] << 24;
3501                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3502                 }
3503         }
3504
3505         /* Set configured hashing protocols in MRQC register */
3506         rss_hf = rss_conf->rss_hf;
3507         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3508         if (rss_hf & RTE_ETH_RSS_IPV4)
3509                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3510         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
3511                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3512         if (rss_hf & RTE_ETH_RSS_IPV6)
3513                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3514         if (rss_hf & RTE_ETH_RSS_IPV6_EX)
3515                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3516         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
3517                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3518         if (rss_hf & RTE_ETH_RSS_IPV6_TCP_EX)
3519                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3520         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
3521                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3522         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
3523                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3524         if (rss_hf & RTE_ETH_RSS_IPV6_UDP_EX)
3525                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3526         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3527 }
3528
3529 int
3530 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3531                           struct rte_eth_rss_conf *rss_conf)
3532 {
3533         struct ixgbe_hw *hw;
3534         uint32_t mrqc;
3535         uint64_t rss_hf;
3536         uint32_t mrqc_reg;
3537
3538         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3539
3540         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3541                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3542                         "NIC.");
3543                 return -ENOTSUP;
3544         }
3545         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3546
3547         /*
3548          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3549          *     "RSS enabling cannot be done dynamically while it must be
3550          *      preceded by a software reset"
3551          * Before changing anything, first check that the update RSS operation
3552          * does not attempt to disable RSS, if RSS was enabled at
3553          * initialization time, or does not attempt to enable RSS, if RSS was
3554          * disabled at initialization time.
3555          */
3556         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3557         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3558         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3559                 if (rss_hf != 0) /* Enable RSS */
3560                         return -(EINVAL);
3561                 return 0; /* Nothing to do */
3562         }
3563         /* RSS enabled */
3564         if (rss_hf == 0) /* Disable RSS */
3565                 return -(EINVAL);
3566         ixgbe_hw_rss_hash_set(hw, rss_conf);
3567         return 0;
3568 }
3569
3570 int
3571 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3572                             struct rte_eth_rss_conf *rss_conf)
3573 {
3574         struct ixgbe_hw *hw;
3575         uint8_t *hash_key;
3576         uint32_t mrqc;
3577         uint32_t rss_key;
3578         uint64_t rss_hf;
3579         uint16_t i;
3580         uint32_t mrqc_reg;
3581         uint32_t rssrk_reg;
3582
3583         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3584         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3585         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3586         hash_key = rss_conf->rss_key;
3587         if (hash_key != NULL) {
3588                 /* Return RSS hash key */
3589                 for (i = 0; i < 10; i++) {
3590                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3591                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3592                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3593                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3594                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3595                 }
3596         }
3597
3598         /* Get RSS functions configured in MRQC register */
3599         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3600         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3601                 rss_conf->rss_hf = 0;
3602                 return 0;
3603         }
3604         rss_hf = 0;
3605         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3606                 rss_hf |= RTE_ETH_RSS_IPV4;
3607         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3608                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
3609         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3610                 rss_hf |= RTE_ETH_RSS_IPV6;
3611         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3612                 rss_hf |= RTE_ETH_RSS_IPV6_EX;
3613         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3614                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
3615         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3616                 rss_hf |= RTE_ETH_RSS_IPV6_TCP_EX;
3617         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3618                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
3619         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3620                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
3621         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3622                 rss_hf |= RTE_ETH_RSS_IPV6_UDP_EX;
3623         rss_conf->rss_hf = rss_hf;
3624         return 0;
3625 }
3626
3627 static void
3628 ixgbe_rss_configure(struct rte_eth_dev *dev)
3629 {
3630         struct rte_eth_rss_conf rss_conf;
3631         struct ixgbe_adapter *adapter;
3632         struct ixgbe_hw *hw;
3633         uint32_t reta;
3634         uint16_t i;
3635         uint16_t j;
3636         uint16_t sp_reta_size;
3637         uint32_t reta_reg;
3638
3639         PMD_INIT_FUNC_TRACE();
3640         adapter = dev->data->dev_private;
3641         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3642
3643         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3644
3645         /*
3646          * Fill in redirection table
3647          * The byte-swap is needed because NIC registers are in
3648          * little-endian order.
3649          */
3650         if (adapter->rss_reta_updated == 0) {
3651                 reta = 0;
3652                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3653                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3654
3655                         if (j == dev->data->nb_rx_queues)
3656                                 j = 0;
3657                         reta = (reta << 8) | j;
3658                         if ((i & 3) == 3)
3659                                 IXGBE_WRITE_REG(hw, reta_reg,
3660                                                 rte_bswap32(reta));
3661                 }
3662         }
3663
3664         /*
3665          * Configure the RSS key and the RSS protocols used to compute
3666          * the RSS hash of input packets.
3667          */
3668         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3669         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3670                 ixgbe_rss_disable(dev);
3671                 return;
3672         }
3673         if (rss_conf.rss_key == NULL)
3674                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3675         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3676 }
3677
3678 #define NUM_VFTA_REGISTERS 128
3679 #define NIC_RX_BUFFER_SIZE 0x200
3680 #define X550_RX_BUFFER_SIZE 0x180
3681
3682 static void
3683 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3684 {
3685         struct rte_eth_vmdq_dcb_conf *cfg;
3686         struct ixgbe_hw *hw;
3687         enum rte_eth_nb_pools num_pools;
3688         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3689         uint16_t pbsize;
3690         uint8_t nb_tcs; /* number of traffic classes */
3691         int i;
3692
3693         PMD_INIT_FUNC_TRACE();
3694         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3695         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3696         num_pools = cfg->nb_queue_pools;
3697         /* Check we have a valid number of pools */
3698         if (num_pools != RTE_ETH_16_POOLS && num_pools != RTE_ETH_32_POOLS) {
3699                 ixgbe_rss_disable(dev);
3700                 return;
3701         }
3702         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3703         nb_tcs = (uint8_t)(RTE_ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3704
3705         /*
3706          * RXPBSIZE
3707          * split rx buffer up into sections, each for 1 traffic class
3708          */
3709         switch (hw->mac.type) {
3710         case ixgbe_mac_X550:
3711         case ixgbe_mac_X550EM_x:
3712         case ixgbe_mac_X550EM_a:
3713                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3714                 break;
3715         default:
3716                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3717                 break;
3718         }
3719         for (i = 0; i < nb_tcs; i++) {
3720                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3721
3722                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3723                 /* clear 10 bits. */
3724                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3725                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3726         }
3727         /* zero alloc all unused TCs */
3728         for (i = nb_tcs; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3729                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3730
3731                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3732                 /* clear 10 bits. */
3733                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3734         }
3735
3736         /* MRQC: enable vmdq and dcb */
3737         mrqc = (num_pools == RTE_ETH_16_POOLS) ?
3738                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3739         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3740
3741         /* PFVTCTL: turn on virtualisation and set the default pool */
3742         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3743         if (cfg->enable_default_pool) {
3744                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3745         } else {
3746                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3747         }
3748
3749         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3750
3751         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3752         queue_mapping = 0;
3753         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++)
3754                 /*
3755                  * mapping is done with 3 bits per priority,
3756                  * so shift by i*3 each time
3757                  */
3758                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3759
3760         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3761
3762         /* RTRPCS: DCB related */
3763         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3764
3765         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3766         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3767         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3768         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3769
3770         /* VFTA - enable all vlan filters */
3771         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3772                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3773         }
3774
3775         /* VFRE: pool enabling for receive - 16 or 32 */
3776         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3777                         num_pools == RTE_ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3778
3779         /*
3780          * MPSAR - allow pools to read specific mac addresses
3781          * In this case, all pools should be able to read from mac addr 0
3782          */
3783         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3784         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3785
3786         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3787         for (i = 0; i < cfg->nb_pool_maps; i++) {
3788                 /* set vlan id in VF register and set the valid bit */
3789                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3790                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3791                 /*
3792                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3793                  * pools, we only need to use the first half of the register
3794                  * i.e. bits 0-31
3795                  */
3796                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3797         }
3798 }
3799
3800 /**
3801  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3802  * @dev: pointer to eth_dev structure
3803  * @dcb_config: pointer to ixgbe_dcb_config structure
3804  */
3805 static void
3806 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3807                        struct ixgbe_dcb_config *dcb_config)
3808 {
3809         uint32_t reg;
3810         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3811
3812         PMD_INIT_FUNC_TRACE();
3813         if (hw->mac.type != ixgbe_mac_82598EB) {
3814                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3815                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3816                 reg |= IXGBE_RTTDCS_ARBDIS;
3817                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3818
3819                 /* Enable DCB for Tx with 8 TCs */
3820                 if (dcb_config->num_tcs.pg_tcs == 8) {
3821                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3822                 } else {
3823                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3824                 }
3825                 if (dcb_config->vt_mode)
3826                         reg |= IXGBE_MTQC_VT_ENA;
3827                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3828
3829                 /* Enable the Tx desc arbiter */
3830                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3831                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3832                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3833
3834                 /* Enable Security TX Buffer IFG for DCB */
3835                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3836                 reg |= IXGBE_SECTX_DCB;
3837                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3838         }
3839 }
3840
3841 /**
3842  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3843  * @dev: pointer to rte_eth_dev structure
3844  * @dcb_config: pointer to ixgbe_dcb_config structure
3845  */
3846 static void
3847 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3848                         struct ixgbe_dcb_config *dcb_config)
3849 {
3850         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3851                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3852         struct ixgbe_hw *hw =
3853                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3854
3855         PMD_INIT_FUNC_TRACE();
3856         if (hw->mac.type != ixgbe_mac_82598EB)
3857                 /*PF VF Transmit Enable*/
3858                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3859                         vmdq_tx_conf->nb_queue_pools == RTE_ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3860
3861         /*Configure general DCB TX parameters*/
3862         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3863 }
3864
3865 static void
3866 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3867                         struct ixgbe_dcb_config *dcb_config)
3868 {
3869         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3870                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3871         struct ixgbe_dcb_tc_config *tc;
3872         uint8_t i, j;
3873
3874         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3875         if (vmdq_rx_conf->nb_queue_pools == RTE_ETH_16_POOLS) {
3876                 dcb_config->num_tcs.pg_tcs = RTE_ETH_8_TCS;
3877                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_8_TCS;
3878         } else {
3879                 dcb_config->num_tcs.pg_tcs = RTE_ETH_4_TCS;
3880                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_4_TCS;
3881         }
3882
3883         /* Initialize User Priority to Traffic Class mapping */
3884         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3885                 tc = &dcb_config->tc_config[j];
3886                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3887         }
3888
3889         /* User Priority to Traffic Class mapping */
3890         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3891                 j = vmdq_rx_conf->dcb_tc[i];
3892                 tc = &dcb_config->tc_config[j];
3893                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3894                                                 (uint8_t)(1 << i);
3895         }
3896 }
3897
3898 static void
3899 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3900                         struct ixgbe_dcb_config *dcb_config)
3901 {
3902         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3903                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3904         struct ixgbe_dcb_tc_config *tc;
3905         uint8_t i, j;
3906
3907         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3908         if (vmdq_tx_conf->nb_queue_pools == RTE_ETH_16_POOLS) {
3909                 dcb_config->num_tcs.pg_tcs = RTE_ETH_8_TCS;
3910                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_8_TCS;
3911         } else {
3912                 dcb_config->num_tcs.pg_tcs = RTE_ETH_4_TCS;
3913                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_4_TCS;
3914         }
3915
3916         /* Initialize User Priority to Traffic Class mapping */
3917         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3918                 tc = &dcb_config->tc_config[j];
3919                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3920         }
3921
3922         /* User Priority to Traffic Class mapping */
3923         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3924                 j = vmdq_tx_conf->dcb_tc[i];
3925                 tc = &dcb_config->tc_config[j];
3926                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3927                                                 (uint8_t)(1 << i);
3928         }
3929 }
3930
3931 static void
3932 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3933                 struct ixgbe_dcb_config *dcb_config)
3934 {
3935         struct rte_eth_dcb_rx_conf *rx_conf =
3936                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3937         struct ixgbe_dcb_tc_config *tc;
3938         uint8_t i, j;
3939
3940         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3941         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3942
3943         /* Initialize User Priority to Traffic Class mapping */
3944         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3945                 tc = &dcb_config->tc_config[j];
3946                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3947         }
3948
3949         /* User Priority to Traffic Class mapping */
3950         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3951                 j = rx_conf->dcb_tc[i];
3952                 tc = &dcb_config->tc_config[j];
3953                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3954                                                 (uint8_t)(1 << i);
3955         }
3956 }
3957
3958 static void
3959 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3960                 struct ixgbe_dcb_config *dcb_config)
3961 {
3962         struct rte_eth_dcb_tx_conf *tx_conf =
3963                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3964         struct ixgbe_dcb_tc_config *tc;
3965         uint8_t i, j;
3966
3967         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3968         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3969
3970         /* Initialize User Priority to Traffic Class mapping */
3971         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3972                 tc = &dcb_config->tc_config[j];
3973                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3974         }
3975
3976         /* User Priority to Traffic Class mapping */
3977         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3978                 j = tx_conf->dcb_tc[i];
3979                 tc = &dcb_config->tc_config[j];
3980                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3981                                                 (uint8_t)(1 << i);
3982         }
3983 }
3984
3985 /**
3986  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3987  * @dev: pointer to eth_dev structure
3988  * @dcb_config: pointer to ixgbe_dcb_config structure
3989  */
3990 static void
3991 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3992                        struct ixgbe_dcb_config *dcb_config)
3993 {
3994         uint32_t reg;
3995         uint32_t vlanctrl;
3996         uint8_t i;
3997         uint32_t q;
3998         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3999
4000         PMD_INIT_FUNC_TRACE();
4001         /*
4002          * Disable the arbiter before changing parameters
4003          * (always enable recycle mode; WSP)
4004          */
4005         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
4006         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4007
4008         if (hw->mac.type != ixgbe_mac_82598EB) {
4009                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
4010                 if (dcb_config->num_tcs.pg_tcs == 4) {
4011                         if (dcb_config->vt_mode)
4012                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4013                                         IXGBE_MRQC_VMDQRT4TCEN;
4014                         else {
4015                                 /* no matter the mode is DCB or DCB_RSS, just
4016                                  * set the MRQE to RSSXTCEN. RSS is controlled
4017                                  * by RSS_FIELD
4018                                  */
4019                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4020                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4021                                         IXGBE_MRQC_RTRSS4TCEN;
4022                         }
4023                 }
4024                 if (dcb_config->num_tcs.pg_tcs == 8) {
4025                         if (dcb_config->vt_mode)
4026                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4027                                         IXGBE_MRQC_VMDQRT8TCEN;
4028                         else {
4029                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4030                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4031                                         IXGBE_MRQC_RTRSS8TCEN;
4032                         }
4033                 }
4034
4035                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4036
4037                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4038                         /* Disable drop for all queues in VMDQ mode*/
4039                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4040                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4041                                                 (IXGBE_QDE_WRITE |
4042                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4043                 } else {
4044                         /* Enable drop for all queues in SRIOV mode */
4045                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4046                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4047                                                 (IXGBE_QDE_WRITE |
4048                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4049                                                  IXGBE_QDE_ENABLE));
4050                 }
4051         }
4052
4053         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4054         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4055         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4056         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4057
4058         /* VFTA - enable all vlan filters */
4059         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4060                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4061         }
4062
4063         /*
4064          * Configure Rx packet plane (recycle mode; WSP) and
4065          * enable arbiter
4066          */
4067         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4068         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4069 }
4070
4071 static void
4072 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4073                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4074 {
4075         switch (hw->mac.type) {
4076         case ixgbe_mac_82598EB:
4077                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4078                 break;
4079         case ixgbe_mac_82599EB:
4080         case ixgbe_mac_X540:
4081         case ixgbe_mac_X550:
4082         case ixgbe_mac_X550EM_x:
4083         case ixgbe_mac_X550EM_a:
4084                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4085                                                   tsa, map);
4086                 break;
4087         default:
4088                 break;
4089         }
4090 }
4091
4092 static void
4093 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4094                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4095 {
4096         switch (hw->mac.type) {
4097         case ixgbe_mac_82598EB:
4098                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4099                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4100                 break;
4101         case ixgbe_mac_82599EB:
4102         case ixgbe_mac_X540:
4103         case ixgbe_mac_X550:
4104         case ixgbe_mac_X550EM_x:
4105         case ixgbe_mac_X550EM_a:
4106                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4107                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4108                 break;
4109         default:
4110                 break;
4111         }
4112 }
4113
4114 #define DCB_RX_CONFIG  1
4115 #define DCB_TX_CONFIG  1
4116 #define DCB_TX_PB      1024
4117 /**
4118  * ixgbe_dcb_hw_configure - Enable DCB and configure
4119  * general DCB in VT mode and non-VT mode parameters
4120  * @dev: pointer to rte_eth_dev structure
4121  * @dcb_config: pointer to ixgbe_dcb_config structure
4122  */
4123 static int
4124 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4125                         struct ixgbe_dcb_config *dcb_config)
4126 {
4127         int     ret = 0;
4128         uint8_t i, pfc_en, nb_tcs;
4129         uint16_t pbsize, rx_buffer_size;
4130         uint8_t config_dcb_rx = 0;
4131         uint8_t config_dcb_tx = 0;
4132         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4133         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4134         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4135         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4136         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4137         struct ixgbe_dcb_tc_config *tc;
4138         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4139                 RTE_ETHER_CRC_LEN;
4140         struct ixgbe_hw *hw =
4141                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4142         struct ixgbe_bw_conf *bw_conf =
4143                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4144
4145         switch (dev->data->dev_conf.rxmode.mq_mode) {
4146         case RTE_ETH_MQ_RX_VMDQ_DCB:
4147                 dcb_config->vt_mode = true;
4148                 if (hw->mac.type != ixgbe_mac_82598EB) {
4149                         config_dcb_rx = DCB_RX_CONFIG;
4150                         /*
4151                          *get dcb and VT rx configuration parameters
4152                          *from rte_eth_conf
4153                          */
4154                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4155                         /*Configure general VMDQ and DCB RX parameters*/
4156                         ixgbe_vmdq_dcb_configure(dev);
4157                 }
4158                 break;
4159         case RTE_ETH_MQ_RX_DCB:
4160         case RTE_ETH_MQ_RX_DCB_RSS:
4161                 dcb_config->vt_mode = false;
4162                 config_dcb_rx = DCB_RX_CONFIG;
4163                 /* Get dcb TX configuration parameters from rte_eth_conf */
4164                 ixgbe_dcb_rx_config(dev, dcb_config);
4165                 /*Configure general DCB RX parameters*/
4166                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4167                 break;
4168         default:
4169                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4170                 break;
4171         }
4172         switch (dev->data->dev_conf.txmode.mq_mode) {
4173         case RTE_ETH_MQ_TX_VMDQ_DCB:
4174                 dcb_config->vt_mode = true;
4175                 config_dcb_tx = DCB_TX_CONFIG;
4176                 /* get DCB and VT TX configuration parameters
4177                  * from rte_eth_conf
4178                  */
4179                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4180                 /*Configure general VMDQ and DCB TX parameters*/
4181                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4182                 break;
4183
4184         case RTE_ETH_MQ_TX_DCB:
4185                 dcb_config->vt_mode = false;
4186                 config_dcb_tx = DCB_TX_CONFIG;
4187                 /*get DCB TX configuration parameters from rte_eth_conf*/
4188                 ixgbe_dcb_tx_config(dev, dcb_config);
4189                 /*Configure general DCB TX parameters*/
4190                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4191                 break;
4192         default:
4193                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4194                 break;
4195         }
4196
4197         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4198         /* Unpack map */
4199         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4200         if (nb_tcs == RTE_ETH_4_TCS) {
4201                 /* Avoid un-configured priority mapping to TC0 */
4202                 uint8_t j = 4;
4203                 uint8_t mask = 0xFF;
4204
4205                 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4206                         mask = (uint8_t)(mask & (~(1 << map[i])));
4207                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4208                         if ((mask & 0x1) && j < RTE_ETH_DCB_NUM_USER_PRIORITIES)
4209                                 map[j++] = i;
4210                         mask >>= 1;
4211                 }
4212                 /* Re-configure 4 TCs BW */
4213                 for (i = 0; i < nb_tcs; i++) {
4214                         tc = &dcb_config->tc_config[i];
4215                         if (bw_conf->tc_num != nb_tcs)
4216                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4217                                         (uint8_t)(100 / nb_tcs);
4218                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4219                                                 (uint8_t)(100 / nb_tcs);
4220                 }
4221                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4222                         tc = &dcb_config->tc_config[i];
4223                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4224                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4225                 }
4226         } else {
4227                 /* Re-configure 8 TCs BW */
4228                 for (i = 0; i < nb_tcs; i++) {
4229                         tc = &dcb_config->tc_config[i];
4230                         if (bw_conf->tc_num != nb_tcs)
4231                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4232                                         (uint8_t)(100 / nb_tcs + (i & 1));
4233                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4234                                 (uint8_t)(100 / nb_tcs + (i & 1));
4235                 }
4236         }
4237
4238         switch (hw->mac.type) {
4239         case ixgbe_mac_X550:
4240         case ixgbe_mac_X550EM_x:
4241         case ixgbe_mac_X550EM_a:
4242                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4243                 break;
4244         default:
4245                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4246                 break;
4247         }
4248
4249         if (config_dcb_rx) {
4250                 /* Set RX buffer size */
4251                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4252                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4253
4254                 for (i = 0; i < nb_tcs; i++) {
4255                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4256                 }
4257                 /* zero alloc all unused TCs */
4258                 for (; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++)
4259                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4260         }
4261         if (config_dcb_tx) {
4262                 /* Only support an equally distributed
4263                  *  Tx packet buffer strategy.
4264                  */
4265                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4266                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4267
4268                 for (i = 0; i < nb_tcs; i++) {
4269                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4270                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4271                 }
4272                 /* Clear unused TCs, if any, to zero buffer size*/
4273                 for (; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
4274                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4275                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4276                 }
4277         }
4278
4279         /*Calculates traffic class credits*/
4280         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4281                                 IXGBE_DCB_TX_CONFIG);
4282         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4283                                 IXGBE_DCB_RX_CONFIG);
4284
4285         if (config_dcb_rx) {
4286                 /* Unpack CEE standard containers */
4287                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4288                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4289                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4290                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4291                 /* Configure PG(ETS) RX */
4292                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4293         }
4294
4295         if (config_dcb_tx) {
4296                 /* Unpack CEE standard containers */
4297                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4298                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4299                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4300                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4301                 /* Configure PG(ETS) TX */
4302                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4303         }
4304
4305         /*Configure queue statistics registers*/
4306         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4307
4308         /* Check if the PFC is supported */
4309         if (dev->data->dev_conf.dcb_capability_en & RTE_ETH_DCB_PFC_SUPPORT) {
4310                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4311                 for (i = 0; i < nb_tcs; i++) {
4312                         /*
4313                         * If the TC count is 8,and the default high_water is 48,
4314                         * the low_water is 16 as default.
4315                         */
4316                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4317                         hw->fc.low_water[i] = pbsize / 4;
4318                         /* Enable pfc for this TC */
4319                         tc = &dcb_config->tc_config[i];
4320                         tc->pfc = ixgbe_dcb_pfc_enabled;
4321                 }
4322                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4323                 if (dcb_config->num_tcs.pfc_tcs == RTE_ETH_4_TCS)
4324                         pfc_en &= 0x0F;
4325                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4326         }
4327
4328         return ret;
4329 }
4330
4331 /**
4332  * ixgbe_configure_dcb - Configure DCB  Hardware
4333  * @dev: pointer to rte_eth_dev
4334  */
4335 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4336 {
4337         struct ixgbe_dcb_config *dcb_cfg =
4338                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4339         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4340
4341         PMD_INIT_FUNC_TRACE();
4342
4343         /* check support mq_mode for DCB */
4344         if (dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_VMDQ_DCB &&
4345             dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_DCB &&
4346             dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_DCB_RSS)
4347                 return;
4348
4349         if (dev->data->nb_rx_queues > RTE_ETH_DCB_NUM_QUEUES)
4350                 return;
4351
4352         /** Configure DCB hardware **/
4353         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4354 }
4355
4356 /*
4357  * VMDq only support for 10 GbE NIC.
4358  */
4359 static void
4360 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4361 {
4362         struct rte_eth_vmdq_rx_conf *cfg;
4363         struct ixgbe_hw *hw;
4364         enum rte_eth_nb_pools num_pools;
4365         uint32_t mrqc, vt_ctl, vlanctrl;
4366         uint32_t vmolr = 0;
4367         int i;
4368
4369         PMD_INIT_FUNC_TRACE();
4370         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4371         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4372         num_pools = cfg->nb_queue_pools;
4373
4374         ixgbe_rss_disable(dev);
4375
4376         /* MRQC: enable vmdq */
4377         mrqc = IXGBE_MRQC_VMDQEN;
4378         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4379
4380         /* PFVTCTL: turn on virtualisation and set the default pool */
4381         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4382         if (cfg->enable_default_pool)
4383                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4384         else
4385                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4386
4387         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4388
4389         for (i = 0; i < (int)num_pools; i++) {
4390                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4391                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4392         }
4393
4394         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4395         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4396         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4397         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4398
4399         /* VFTA - enable all vlan filters */
4400         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4401                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4402
4403         /* VFRE: pool enabling for receive - 64 */
4404         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4405         if (num_pools == RTE_ETH_64_POOLS)
4406                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4407
4408         /*
4409          * MPSAR - allow pools to read specific mac addresses
4410          * In this case, all pools should be able to read from mac addr 0
4411          */
4412         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4413         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4414
4415         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4416         for (i = 0; i < cfg->nb_pool_maps; i++) {
4417                 /* set vlan id in VF register and set the valid bit */
4418                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4419                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4420                 /*
4421                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4422                  * pools, we only need to use the first half of the register
4423                  * i.e. bits 0-31
4424                  */
4425                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4426                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4427                                         (cfg->pool_map[i].pools & UINT32_MAX));
4428                 else
4429                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4430                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4431
4432         }
4433
4434         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4435         if (cfg->enable_loop_back) {
4436                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4437                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4438                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4439         }
4440
4441         IXGBE_WRITE_FLUSH(hw);
4442 }
4443
4444 /*
4445  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4446  * @hw: pointer to hardware structure
4447  */
4448 static void
4449 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4450 {
4451         uint32_t reg;
4452         uint32_t q;
4453
4454         PMD_INIT_FUNC_TRACE();
4455         /*PF VF Transmit Enable*/
4456         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4457         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4458
4459         /* Disable the Tx desc arbiter so that MTQC can be changed */
4460         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4461         reg |= IXGBE_RTTDCS_ARBDIS;
4462         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4463
4464         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4465         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4466
4467         /* Disable drop for all queues */
4468         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4469                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4470                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4471
4472         /* Enable the Tx desc arbiter */
4473         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4474         reg &= ~IXGBE_RTTDCS_ARBDIS;
4475         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4476
4477         IXGBE_WRITE_FLUSH(hw);
4478 }
4479
4480 static int __rte_cold
4481 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4482 {
4483         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4484         uint64_t dma_addr;
4485         unsigned int i;
4486
4487         /* Initialize software ring entries */
4488         for (i = 0; i < rxq->nb_rx_desc; i++) {
4489                 volatile union ixgbe_adv_rx_desc *rxd;
4490                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4491
4492                 if (mbuf == NULL) {
4493                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4494                                      (unsigned) rxq->queue_id);
4495                         return -ENOMEM;
4496                 }
4497
4498                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4499                 mbuf->port = rxq->port_id;
4500
4501                 dma_addr =
4502                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4503                 rxd = &rxq->rx_ring[i];
4504                 rxd->read.hdr_addr = 0;
4505                 rxd->read.pkt_addr = dma_addr;
4506                 rxe[i].mbuf = mbuf;
4507         }
4508
4509         return 0;
4510 }
4511
4512 static int
4513 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4514 {
4515         struct ixgbe_hw *hw;
4516         uint32_t mrqc;
4517
4518         ixgbe_rss_configure(dev);
4519
4520         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4521
4522         /* MRQC: enable VF RSS */
4523         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4524         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4525         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4526         case RTE_ETH_64_POOLS:
4527                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4528                 break;
4529
4530         case RTE_ETH_32_POOLS:
4531                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4532                 break;
4533
4534         default:
4535                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4536                 return -EINVAL;
4537         }
4538
4539         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4540
4541         return 0;
4542 }
4543
4544 static int
4545 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4546 {
4547         struct ixgbe_hw *hw =
4548                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4549
4550         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4551         case RTE_ETH_64_POOLS:
4552                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4553                         IXGBE_MRQC_VMDQEN);
4554                 break;
4555
4556         case RTE_ETH_32_POOLS:
4557                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4558                         IXGBE_MRQC_VMDQRT4TCEN);
4559                 break;
4560
4561         case RTE_ETH_16_POOLS:
4562                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4563                         IXGBE_MRQC_VMDQRT8TCEN);
4564                 break;
4565         default:
4566                 PMD_INIT_LOG(ERR,
4567                         "invalid pool number in IOV mode");
4568                 break;
4569         }
4570         return 0;
4571 }
4572
4573 static int
4574 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4575 {
4576         struct ixgbe_hw *hw =
4577                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4578
4579         if (hw->mac.type == ixgbe_mac_82598EB)
4580                 return 0;
4581
4582         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4583                 /*
4584                  * SRIOV inactive scheme
4585                  * any DCB/RSS w/o VMDq multi-queue setting
4586                  */
4587                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4588                 case RTE_ETH_MQ_RX_RSS:
4589                 case RTE_ETH_MQ_RX_DCB_RSS:
4590                 case RTE_ETH_MQ_RX_VMDQ_RSS:
4591                         ixgbe_rss_configure(dev);
4592                         break;
4593
4594                 case RTE_ETH_MQ_RX_VMDQ_DCB:
4595                         ixgbe_vmdq_dcb_configure(dev);
4596                         break;
4597
4598                 case RTE_ETH_MQ_RX_VMDQ_ONLY:
4599                         ixgbe_vmdq_rx_hw_configure(dev);
4600                         break;
4601
4602                 case RTE_ETH_MQ_RX_NONE:
4603                 default:
4604                         /* if mq_mode is none, disable rss mode.*/
4605                         ixgbe_rss_disable(dev);
4606                         break;
4607                 }
4608         } else {
4609                 /* SRIOV active scheme
4610                  * Support RSS together with SRIOV.
4611                  */
4612                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4613                 case RTE_ETH_MQ_RX_RSS:
4614                 case RTE_ETH_MQ_RX_VMDQ_RSS:
4615                         ixgbe_config_vf_rss(dev);
4616                         break;
4617                 case RTE_ETH_MQ_RX_VMDQ_DCB:
4618                 case RTE_ETH_MQ_RX_DCB:
4619                 /* In SRIOV, the configuration is the same as VMDq case */
4620                         ixgbe_vmdq_dcb_configure(dev);
4621                         break;
4622                 /* DCB/RSS together with SRIOV is not supported */
4623                 case RTE_ETH_MQ_RX_VMDQ_DCB_RSS:
4624                 case RTE_ETH_MQ_RX_DCB_RSS:
4625                         PMD_INIT_LOG(ERR,
4626                                 "Could not support DCB/RSS with VMDq & SRIOV");
4627                         return -1;
4628                 default:
4629                         ixgbe_config_vf_default(dev);
4630                         break;
4631                 }
4632         }
4633
4634         return 0;
4635 }
4636
4637 static int
4638 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4639 {
4640         struct ixgbe_hw *hw =
4641                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4642         uint32_t mtqc;
4643         uint32_t rttdcs;
4644
4645         if (hw->mac.type == ixgbe_mac_82598EB)
4646                 return 0;
4647
4648         /* disable arbiter before setting MTQC */
4649         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4650         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4651         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4652
4653         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4654                 /*
4655                  * SRIOV inactive scheme
4656                  * any DCB w/o VMDq multi-queue setting
4657                  */
4658                 if (dev->data->dev_conf.txmode.mq_mode == RTE_ETH_MQ_TX_VMDQ_ONLY)
4659                         ixgbe_vmdq_tx_hw_configure(hw);
4660                 else {
4661                         mtqc = IXGBE_MTQC_64Q_1PB;
4662                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4663                 }
4664         } else {
4665                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4666
4667                 /*
4668                  * SRIOV active scheme
4669                  * FIXME if support DCB together with VMDq & SRIOV
4670                  */
4671                 case RTE_ETH_64_POOLS:
4672                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4673                         break;
4674                 case RTE_ETH_32_POOLS:
4675                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4676                         break;
4677                 case RTE_ETH_16_POOLS:
4678                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4679                                 IXGBE_MTQC_8TC_8TQ;
4680                         break;
4681                 default:
4682                         mtqc = IXGBE_MTQC_64Q_1PB;
4683                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4684                 }
4685                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4686         }
4687
4688         /* re-enable arbiter */
4689         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4690         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4691
4692         return 0;
4693 }
4694
4695 /**
4696  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4697  *
4698  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4699  * spec rev. 3.0 chapter 8.2.3.8.13.
4700  *
4701  * @pool Memory pool of the Rx queue
4702  */
4703 static inline uint32_t
4704 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4705 {
4706         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4707
4708         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4709         uint16_t maxdesc =
4710                 RTE_IPV4_MAX_PKT_LEN /
4711                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4712
4713         if (maxdesc >= 16)
4714                 return IXGBE_RSCCTL_MAXDESC_16;
4715         else if (maxdesc >= 8)
4716                 return IXGBE_RSCCTL_MAXDESC_8;
4717         else if (maxdesc >= 4)
4718                 return IXGBE_RSCCTL_MAXDESC_4;
4719         else
4720                 return IXGBE_RSCCTL_MAXDESC_1;
4721 }
4722
4723 /**
4724  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4725  * interrupt
4726  *
4727  * (Taken from FreeBSD tree)
4728  * (yes this is all very magic and confusing :)
4729  *
4730  * @dev port handle
4731  * @entry the register array entry
4732  * @vector the MSIX vector for this queue
4733  * @type RX/TX/MISC
4734  */
4735 static void
4736 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4737 {
4738         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4739         u32 ivar, index;
4740
4741         vector |= IXGBE_IVAR_ALLOC_VAL;
4742
4743         switch (hw->mac.type) {
4744
4745         case ixgbe_mac_82598EB:
4746                 if (type == -1)
4747                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4748                 else
4749                         entry += (type * 64);
4750                 index = (entry >> 2) & 0x1F;
4751                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4752                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4753                 ivar |= (vector << (8 * (entry & 0x3)));
4754                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4755                 break;
4756
4757         case ixgbe_mac_82599EB:
4758         case ixgbe_mac_X540:
4759                 if (type == -1) { /* MISC IVAR */
4760                         index = (entry & 1) * 8;
4761                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4762                         ivar &= ~(0xFF << index);
4763                         ivar |= (vector << index);
4764                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4765                 } else {        /* RX/TX IVARS */
4766                         index = (16 * (entry & 1)) + (8 * type);
4767                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4768                         ivar &= ~(0xFF << index);
4769                         ivar |= (vector << index);
4770                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4771                 }
4772
4773                 break;
4774
4775         default:
4776                 break;
4777         }
4778 }
4779
4780 void __rte_cold
4781 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4782 {
4783         uint16_t i, rx_using_sse;
4784         struct ixgbe_adapter *adapter = dev->data->dev_private;
4785
4786         /*
4787          * In order to allow Vector Rx there are a few configuration
4788          * conditions to be met and Rx Bulk Allocation should be allowed.
4789          */
4790         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4791             !adapter->rx_bulk_alloc_allowed ||
4792                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4793                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4794                                     "preconditions",
4795                              dev->data->port_id);
4796
4797                 adapter->rx_vec_allowed = false;
4798         }
4799
4800         /*
4801          * Initialize the appropriate LRO callback.
4802          *
4803          * If all queues satisfy the bulk allocation preconditions
4804          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4805          * Otherwise use a single allocation version.
4806          */
4807         if (dev->data->lro) {
4808                 if (adapter->rx_bulk_alloc_allowed) {
4809                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4810                                            "allocation version");
4811                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4812                 } else {
4813                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4814                                            "allocation version");
4815                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4816                 }
4817         } else if (dev->data->scattered_rx) {
4818                 /*
4819                  * Set the non-LRO scattered callback: there are Vector and
4820                  * single allocation versions.
4821                  */
4822                 if (adapter->rx_vec_allowed) {
4823                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4824                                             "callback (port=%d).",
4825                                      dev->data->port_id);
4826
4827                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4828                 } else if (adapter->rx_bulk_alloc_allowed) {
4829                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4830                                            "allocation callback (port=%d).",
4831                                      dev->data->port_id);
4832                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4833                 } else {
4834                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4835                                             "single allocation) "
4836                                             "Scattered Rx callback "
4837                                             "(port=%d).",
4838                                      dev->data->port_id);
4839
4840                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4841                 }
4842         /*
4843          * Below we set "simple" callbacks according to port/queues parameters.
4844          * If parameters allow we are going to choose between the following
4845          * callbacks:
4846          *    - Vector
4847          *    - Bulk Allocation
4848          *    - Single buffer allocation (the simplest one)
4849          */
4850         } else if (adapter->rx_vec_allowed) {
4851                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4852                                     "burst size no less than %d (port=%d).",
4853                              RTE_IXGBE_DESCS_PER_LOOP,
4854                              dev->data->port_id);
4855
4856                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4857         } else if (adapter->rx_bulk_alloc_allowed) {
4858                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4859                                     "satisfied. Rx Burst Bulk Alloc function "
4860                                     "will be used on port=%d.",
4861                              dev->data->port_id);
4862
4863                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4864         } else {
4865                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4866                                     "satisfied, or Scattered Rx is requested "
4867                                     "(port=%d).",
4868                              dev->data->port_id);
4869
4870                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4871         }
4872
4873         /* Propagate information about RX function choice through all queues. */
4874
4875         rx_using_sse =
4876                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4877                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4878
4879         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4880                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4881
4882                 rxq->rx_using_sse = rx_using_sse;
4883 #ifdef RTE_LIB_SECURITY
4884                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4885                                 RTE_ETH_RX_OFFLOAD_SECURITY);
4886 #endif
4887         }
4888 }
4889
4890 /**
4891  * ixgbe_set_rsc - configure RSC related port HW registers
4892  *
4893  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4894  * of 82599 Spec (x540 configuration is virtually the same).
4895  *
4896  * @dev port handle
4897  *
4898  * Returns 0 in case of success or a non-zero error code
4899  */
4900 static int
4901 ixgbe_set_rsc(struct rte_eth_dev *dev)
4902 {
4903         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4904         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4905         struct rte_eth_dev_info dev_info = { 0 };
4906         bool rsc_capable = false;
4907         uint16_t i;
4908         uint32_t rdrxctl;
4909         uint32_t rfctl;
4910
4911         /* Sanity check */
4912         dev->dev_ops->dev_infos_get(dev, &dev_info);
4913         if (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO)
4914                 rsc_capable = true;
4915
4916         if (!rsc_capable && (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
4917                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4918                                    "support it");
4919                 return -EINVAL;
4920         }
4921
4922         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4923
4924         if ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) &&
4925              (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
4926                 /*
4927                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4928                  * 3.0 RSC configuration requires HW CRC stripping being
4929                  * enabled. If user requested both HW CRC stripping off
4930                  * and RSC on - return an error.
4931                  */
4932                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4933                                     "is disabled");
4934                 return -EINVAL;
4935         }
4936
4937         /* RFCTL configuration  */
4938         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4939         if ((rsc_capable) && (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO))
4940                 rfctl &= ~IXGBE_RFCTL_RSC_DIS;
4941         else
4942                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4943         /* disable NFS filtering */
4944         rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS;
4945         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4946
4947         /* If LRO hasn't been requested - we are done here. */
4948         if (!(rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO))
4949                 return 0;
4950
4951         /* Set RDRXCTL.RSCACKC bit */
4952         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4953         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4954         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4955
4956         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4957         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4958                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4959                 uint32_t srrctl =
4960                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4961                 uint32_t rscctl =
4962                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4963                 uint32_t psrtype =
4964                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4965                 uint32_t eitr =
4966                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4967
4968                 /*
4969                  * ixgbe PMD doesn't support header-split at the moment.
4970                  *
4971                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4972                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4973                  * should be configured even if header split is not
4974                  * enabled. We will configure it 128 bytes following the
4975                  * recommendation in the spec.
4976                  */
4977                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4978                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4979                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4980
4981                 /*
4982                  * TODO: Consider setting the Receive Descriptor Minimum
4983                  * Threshold Size for an RSC case. This is not an obviously
4984                  * beneficiary option but the one worth considering...
4985                  */
4986
4987                 rscctl |= IXGBE_RSCCTL_RSCEN;
4988                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4989                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4990
4991                 /*
4992                  * RSC: Set ITR interval corresponding to 2K ints/s.
4993                  *
4994                  * Full-sized RSC aggregations for a 10Gb/s link will
4995                  * arrive at about 20K aggregation/s rate.
4996                  *
4997                  * 2K inst/s rate will make only 10% of the
4998                  * aggregations to be closed due to the interrupt timer
4999                  * expiration for a streaming at wire-speed case.
5000                  *
5001                  * For a sparse streaming case this setting will yield
5002                  * at most 500us latency for a single RSC aggregation.
5003                  */
5004                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
5005                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
5006                 eitr |= IXGBE_EITR_CNT_WDIS;
5007
5008                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5009                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
5010                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
5011                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
5012
5013                 /*
5014                  * RSC requires the mapping of the queue to the
5015                  * interrupt vector.
5016                  */
5017                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5018         }
5019
5020         dev->data->lro = 1;
5021
5022         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5023
5024         return 0;
5025 }
5026
5027 /*
5028  * Initializes Receive Unit.
5029  */
5030 int __rte_cold
5031 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5032 {
5033         struct ixgbe_hw     *hw;
5034         struct ixgbe_rx_queue *rxq;
5035         uint64_t bus_addr;
5036         uint32_t rxctrl;
5037         uint32_t fctrl;
5038         uint32_t hlreg0;
5039         uint32_t maxfrs;
5040         uint32_t srrctl;
5041         uint32_t rdrxctl;
5042         uint32_t rxcsum;
5043         uint16_t buf_size;
5044         uint16_t i;
5045         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5046         uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
5047         int rc;
5048
5049         PMD_INIT_FUNC_TRACE();
5050         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5051
5052         /*
5053          * Make sure receives are disabled while setting
5054          * up the RX context (registers, descriptor rings, etc.).
5055          */
5056         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5057         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5058
5059         /* Enable receipt of broadcasted frames */
5060         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5061         fctrl |= IXGBE_FCTRL_BAM;
5062         fctrl |= IXGBE_FCTRL_DPF;
5063         fctrl |= IXGBE_FCTRL_PMCF;
5064         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5065
5066         /*
5067          * Configure CRC stripping, if any.
5068          */
5069         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5070         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5071                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5072         else
5073                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5074
5075         /*
5076          * Configure jumbo frame support, if any.
5077          */
5078         if (dev->data->mtu > RTE_ETHER_MTU) {
5079                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5080                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5081                 maxfrs &= 0x0000FFFF;
5082                 maxfrs |= (frame_size << 16);
5083                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5084         } else
5085                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5086
5087         /*
5088          * If loopback mode is configured, set LPBK bit.
5089          */
5090         if (dev->data->dev_conf.lpbk_mode != 0) {
5091                 rc = ixgbe_check_supported_loopback_mode(dev);
5092                 if (rc < 0) {
5093                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5094                         return rc;
5095                 }
5096                 hlreg0 |= IXGBE_HLREG0_LPBK;
5097         } else {
5098                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5099         }
5100
5101         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5102
5103         /*
5104          * Assume no header split and no VLAN strip support
5105          * on any Rx queue first .
5106          */
5107         rx_conf->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5108         /* Setup RX queues */
5109         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5110                 rxq = dev->data->rx_queues[i];
5111
5112                 /*
5113                  * Reset crc_len in case it was changed after queue setup by a
5114                  * call to configure.
5115                  */
5116                 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5117                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5118                 else
5119                         rxq->crc_len = 0;
5120
5121                 /* Setup the Base and Length of the Rx Descriptor Rings */
5122                 bus_addr = rxq->rx_ring_phys_addr;
5123                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5124                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5125                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5126                                 (uint32_t)(bus_addr >> 32));
5127                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5128                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5129                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5130                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5131
5132                 /* Configure the SRRCTL register */
5133                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5134
5135                 /* Set if packets are dropped when no descriptors available */
5136                 if (rxq->drop_en)
5137                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5138
5139                 /*
5140                  * Configure the RX buffer size in the BSIZEPACKET field of
5141                  * the SRRCTL register of the queue.
5142                  * The value is in 1 KB resolution. Valid values can be from
5143                  * 1 KB to 16 KB.
5144                  */
5145                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5146                         RTE_PKTMBUF_HEADROOM);
5147                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5148                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5149
5150                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5151
5152                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5153                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5154
5155                 /* It adds dual VLAN length for supporting dual VLAN */
5156                 if (frame_size + 2 * RTE_VLAN_HLEN > buf_size)
5157                         dev->data->scattered_rx = 1;
5158                 if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
5159                         rx_conf->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5160         }
5161
5162         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
5163                 dev->data->scattered_rx = 1;
5164
5165         /*
5166          * Device configured with multiple RX queues.
5167          */
5168         ixgbe_dev_mq_rx_configure(dev);
5169
5170         /*
5171          * Setup the Checksum Register.
5172          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5173          * Enable IP/L4 checkum computation by hardware if requested to do so.
5174          */
5175         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5176         rxcsum |= IXGBE_RXCSUM_PCSD;
5177         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
5178                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5179         else
5180                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5181
5182         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5183
5184         if (hw->mac.type == ixgbe_mac_82599EB ||
5185             hw->mac.type == ixgbe_mac_X540) {
5186                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5187                 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5188                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5189                 else
5190                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5191                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5192                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5193         }
5194
5195         rc = ixgbe_set_rsc(dev);
5196         if (rc)
5197                 return rc;
5198
5199         ixgbe_set_rx_function(dev);
5200
5201         return 0;
5202 }
5203
5204 /*
5205  * Initializes Transmit Unit.
5206  */
5207 void __rte_cold
5208 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5209 {
5210         struct ixgbe_hw     *hw;
5211         struct ixgbe_tx_queue *txq;
5212         uint64_t bus_addr;
5213         uint32_t hlreg0;
5214         uint32_t txctrl;
5215         uint16_t i;
5216
5217         PMD_INIT_FUNC_TRACE();
5218         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5219
5220         /* Enable TX CRC (checksum offload requirement) and hw padding
5221          * (TSO requirement)
5222          */
5223         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5224         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5225         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5226
5227         /* Setup the Base and Length of the Tx Descriptor Rings */
5228         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5229                 txq = dev->data->tx_queues[i];
5230
5231                 bus_addr = txq->tx_ring_phys_addr;
5232                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5233                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5234                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5235                                 (uint32_t)(bus_addr >> 32));
5236                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5237                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5238                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5239                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5240                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5241
5242                 /*
5243                  * Disable Tx Head Writeback RO bit, since this hoses
5244                  * bookkeeping if things aren't delivered in order.
5245                  */
5246                 switch (hw->mac.type) {
5247                 case ixgbe_mac_82598EB:
5248                         txctrl = IXGBE_READ_REG(hw,
5249                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5250                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5251                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5252                                         txctrl);
5253                         break;
5254
5255                 case ixgbe_mac_82599EB:
5256                 case ixgbe_mac_X540:
5257                 case ixgbe_mac_X550:
5258                 case ixgbe_mac_X550EM_x:
5259                 case ixgbe_mac_X550EM_a:
5260                 default:
5261                         txctrl = IXGBE_READ_REG(hw,
5262                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5263                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5264                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5265                                         txctrl);
5266                         break;
5267                 }
5268         }
5269
5270         /* Device configured with multiple TX queues. */
5271         ixgbe_dev_mq_tx_configure(dev);
5272 }
5273
5274 /*
5275  * Check if requested loopback mode is supported
5276  */
5277 int
5278 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5279 {
5280         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5281
5282         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5283                 if (hw->mac.type == ixgbe_mac_82599EB ||
5284                      hw->mac.type == ixgbe_mac_X540 ||
5285                      hw->mac.type == ixgbe_mac_X550 ||
5286                      hw->mac.type == ixgbe_mac_X550EM_x ||
5287                      hw->mac.type == ixgbe_mac_X550EM_a)
5288                         return 0;
5289
5290         return -ENOTSUP;
5291 }
5292
5293 /*
5294  * Set up link for 82599 loopback mode Tx->Rx.
5295  */
5296 static inline void __rte_cold
5297 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5298 {
5299         PMD_INIT_FUNC_TRACE();
5300
5301         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5302                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5303                                 IXGBE_SUCCESS) {
5304                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5305                         /* ignore error */
5306                         return;
5307                 }
5308         }
5309
5310         /* Restart link */
5311         IXGBE_WRITE_REG(hw,
5312                         IXGBE_AUTOC,
5313                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5314         ixgbe_reset_pipeline_82599(hw);
5315
5316         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5317         msec_delay(50);
5318 }
5319
5320
5321 /*
5322  * Start Transmit and Receive Units.
5323  */
5324 int __rte_cold
5325 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5326 {
5327         struct ixgbe_hw     *hw;
5328         struct ixgbe_tx_queue *txq;
5329         struct ixgbe_rx_queue *rxq;
5330         uint32_t txdctl;
5331         uint32_t dmatxctl;
5332         uint32_t rxctrl;
5333         uint16_t i;
5334         int ret = 0;
5335
5336         PMD_INIT_FUNC_TRACE();
5337         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5338
5339         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5340                 txq = dev->data->tx_queues[i];
5341                 /* Setup Transmit Threshold Registers */
5342                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5343                 txdctl |= txq->pthresh & 0x7F;
5344                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5345                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5346                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5347         }
5348
5349         if (hw->mac.type != ixgbe_mac_82598EB) {
5350                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5351                 dmatxctl |= IXGBE_DMATXCTL_TE;
5352                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5353         }
5354
5355         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5356                 txq = dev->data->tx_queues[i];
5357                 if (!txq->tx_deferred_start) {
5358                         ret = ixgbe_dev_tx_queue_start(dev, i);
5359                         if (ret < 0)
5360                                 return ret;
5361                 }
5362         }
5363
5364         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5365                 rxq = dev->data->rx_queues[i];
5366                 if (!rxq->rx_deferred_start) {
5367                         ret = ixgbe_dev_rx_queue_start(dev, i);
5368                         if (ret < 0)
5369                                 return ret;
5370                 }
5371         }
5372
5373         /* Enable Receive engine */
5374         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5375         if (hw->mac.type == ixgbe_mac_82598EB)
5376                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5377         rxctrl |= IXGBE_RXCTRL_RXEN;
5378         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5379
5380         /* If loopback mode is enabled, set up the link accordingly */
5381         if (dev->data->dev_conf.lpbk_mode != 0) {
5382                 if (hw->mac.type == ixgbe_mac_82599EB)
5383                         ixgbe_setup_loopback_link_82599(hw);
5384                 else if (hw->mac.type == ixgbe_mac_X540 ||
5385                      hw->mac.type == ixgbe_mac_X550 ||
5386                      hw->mac.type == ixgbe_mac_X550EM_x ||
5387                      hw->mac.type == ixgbe_mac_X550EM_a)
5388                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5389         }
5390
5391 #ifdef RTE_LIB_SECURITY
5392         if ((dev->data->dev_conf.rxmode.offloads &
5393                         RTE_ETH_RX_OFFLOAD_SECURITY) ||
5394                 (dev->data->dev_conf.txmode.offloads &
5395                         RTE_ETH_TX_OFFLOAD_SECURITY)) {
5396                 ret = ixgbe_crypto_enable_ipsec(dev);
5397                 if (ret != 0) {
5398                         PMD_DRV_LOG(ERR,
5399                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5400                                     ret);
5401                         return ret;
5402                 }
5403         }
5404 #endif
5405
5406         return 0;
5407 }
5408
5409 /*
5410  * Start Receive Units for specified queue.
5411  */
5412 int __rte_cold
5413 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5414 {
5415         struct ixgbe_hw     *hw;
5416         struct ixgbe_rx_queue *rxq;
5417         uint32_t rxdctl;
5418         int poll_ms;
5419
5420         PMD_INIT_FUNC_TRACE();
5421         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5422
5423         rxq = dev->data->rx_queues[rx_queue_id];
5424
5425         /* Allocate buffers for descriptor rings */
5426         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5427                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5428                              rx_queue_id);
5429                 return -1;
5430         }
5431         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5432         rxdctl |= IXGBE_RXDCTL_ENABLE;
5433         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5434
5435         /* Wait until RX Enable ready */
5436         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5437         do {
5438                 rte_delay_ms(1);
5439                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5440         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5441         if (!poll_ms)
5442                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5443         rte_wmb();
5444         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5445         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5446         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5447
5448         return 0;
5449 }
5450
5451 /*
5452  * Stop Receive Units for specified queue.
5453  */
5454 int __rte_cold
5455 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5456 {
5457         struct ixgbe_hw     *hw;
5458         struct ixgbe_adapter *adapter = dev->data->dev_private;
5459         struct ixgbe_rx_queue *rxq;
5460         uint32_t rxdctl;
5461         int poll_ms;
5462
5463         PMD_INIT_FUNC_TRACE();
5464         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5465
5466         rxq = dev->data->rx_queues[rx_queue_id];
5467
5468         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5469         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5470         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5471
5472         /* Wait until RX Enable bit clear */
5473         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5474         do {
5475                 rte_delay_ms(1);
5476                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5477         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5478         if (!poll_ms)
5479                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5480
5481         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5482
5483         ixgbe_rx_queue_release_mbufs(rxq);
5484         ixgbe_reset_rx_queue(adapter, rxq);
5485         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5486
5487         return 0;
5488 }
5489
5490
5491 /*
5492  * Start Transmit Units for specified queue.
5493  */
5494 int __rte_cold
5495 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5496 {
5497         struct ixgbe_hw     *hw;
5498         struct ixgbe_tx_queue *txq;
5499         uint32_t txdctl;
5500         int poll_ms;
5501
5502         PMD_INIT_FUNC_TRACE();
5503         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5504
5505         txq = dev->data->tx_queues[tx_queue_id];
5506         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5507         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5508         txdctl |= IXGBE_TXDCTL_ENABLE;
5509         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5510
5511         /* Wait until TX Enable ready */
5512         if (hw->mac.type == ixgbe_mac_82599EB) {
5513                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5514                 do {
5515                         rte_delay_ms(1);
5516                         txdctl = IXGBE_READ_REG(hw,
5517                                 IXGBE_TXDCTL(txq->reg_idx));
5518                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5519                 if (!poll_ms)
5520                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5521                                 tx_queue_id);
5522         }
5523         rte_wmb();
5524         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5525         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5526
5527         return 0;
5528 }
5529
5530 /*
5531  * Stop Transmit Units for specified queue.
5532  */
5533 int __rte_cold
5534 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5535 {
5536         struct ixgbe_hw     *hw;
5537         struct ixgbe_tx_queue *txq;
5538         uint32_t txdctl;
5539         uint32_t txtdh, txtdt;
5540         int poll_ms;
5541
5542         PMD_INIT_FUNC_TRACE();
5543         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5544
5545         txq = dev->data->tx_queues[tx_queue_id];
5546
5547         /* Wait until TX queue is empty */
5548         if (hw->mac.type == ixgbe_mac_82599EB) {
5549                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5550                 do {
5551                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5552                         txtdh = IXGBE_READ_REG(hw,
5553                                                IXGBE_TDH(txq->reg_idx));
5554                         txtdt = IXGBE_READ_REG(hw,
5555                                                IXGBE_TDT(txq->reg_idx));
5556                 } while (--poll_ms && (txtdh != txtdt));
5557                 if (!poll_ms)
5558                         PMD_INIT_LOG(ERR,
5559                                 "Tx Queue %d is not empty when stopping.",
5560                                 tx_queue_id);
5561         }
5562
5563         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5564         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5565         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5566
5567         /* Wait until TX Enable bit clear */
5568         if (hw->mac.type == ixgbe_mac_82599EB) {
5569                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5570                 do {
5571                         rte_delay_ms(1);
5572                         txdctl = IXGBE_READ_REG(hw,
5573                                                 IXGBE_TXDCTL(txq->reg_idx));
5574                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5575                 if (!poll_ms)
5576                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5577                                 tx_queue_id);
5578         }
5579
5580         if (txq->ops != NULL) {
5581                 txq->ops->release_mbufs(txq);
5582                 txq->ops->reset(txq);
5583         }
5584         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5585
5586         return 0;
5587 }
5588
5589 void
5590 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5591         struct rte_eth_rxq_info *qinfo)
5592 {
5593         struct ixgbe_rx_queue *rxq;
5594
5595         rxq = dev->data->rx_queues[queue_id];
5596
5597         qinfo->mp = rxq->mb_pool;
5598         qinfo->scattered_rx = dev->data->scattered_rx;
5599         qinfo->nb_desc = rxq->nb_rx_desc;
5600
5601         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5602         qinfo->conf.rx_drop_en = rxq->drop_en;
5603         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5604         qinfo->conf.offloads = rxq->offloads;
5605 }
5606
5607 void
5608 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5609         struct rte_eth_txq_info *qinfo)
5610 {
5611         struct ixgbe_tx_queue *txq;
5612
5613         txq = dev->data->tx_queues[queue_id];
5614
5615         qinfo->nb_desc = txq->nb_tx_desc;
5616
5617         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5618         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5619         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5620
5621         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5622         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5623         qinfo->conf.offloads = txq->offloads;
5624         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5625 }
5626
5627 /*
5628  * [VF] Initializes Receive Unit.
5629  */
5630 int __rte_cold
5631 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5632 {
5633         struct ixgbe_hw     *hw;
5634         struct ixgbe_rx_queue *rxq;
5635         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5636         uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
5637         uint64_t bus_addr;
5638         uint32_t srrctl, psrtype = 0;
5639         uint16_t buf_size;
5640         uint16_t i;
5641         int ret;
5642
5643         PMD_INIT_FUNC_TRACE();
5644         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5645
5646         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5647                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5648                         "it should be power of 2");
5649                 return -1;
5650         }
5651
5652         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5653                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5654                         "it should be equal to or less than %d",
5655                         hw->mac.max_rx_queues);
5656                 return -1;
5657         }
5658
5659         /*
5660          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5661          * disables the VF receipt of packets if the PF MTU is > 1500.
5662          * This is done to deal with 82599 limitations that imposes
5663          * the PF and all VFs to share the same MTU.
5664          * Then, the PF driver enables again the VF receipt of packet when
5665          * the VF driver issues a IXGBE_VF_SET_LPE request.
5666          * In the meantime, the VF device cannot be used, even if the VF driver
5667          * and the Guest VM network stack are ready to accept packets with a
5668          * size up to the PF MTU.
5669          * As a work-around to this PF behaviour, force the call to
5670          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5671          * VF packets received can work in all cases.
5672          */
5673         if (ixgbevf_rlpml_set_vf(hw, frame_size) != 0)
5674                 PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5675                              frame_size);
5676
5677         /*
5678          * Assume no header split and no VLAN strip support
5679          * on any Rx queue first .
5680          */
5681         rxmode->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5682         /* Setup RX queues */
5683         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5684                 rxq = dev->data->rx_queues[i];
5685
5686                 /* Allocate buffers for descriptor rings */
5687                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5688                 if (ret)
5689                         return ret;
5690
5691                 /* Setup the Base and Length of the Rx Descriptor Rings */
5692                 bus_addr = rxq->rx_ring_phys_addr;
5693
5694                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5695                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5696                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5697                                 (uint32_t)(bus_addr >> 32));
5698                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5699                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5700                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5701                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5702
5703
5704                 /* Configure the SRRCTL register */
5705                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5706
5707                 /* Set if packets are dropped when no descriptors available */
5708                 if (rxq->drop_en)
5709                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5710
5711                 /*
5712                  * Configure the RX buffer size in the BSIZEPACKET field of
5713                  * the SRRCTL register of the queue.
5714                  * The value is in 1 KB resolution. Valid values can be from
5715                  * 1 KB to 16 KB.
5716                  */
5717                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5718                         RTE_PKTMBUF_HEADROOM);
5719                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5720                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5721
5722                 /*
5723                  * VF modification to write virtual function SRRCTL register
5724                  */
5725                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5726
5727                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5728                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5729
5730                 if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_SCATTER ||
5731                     /* It adds dual VLAN length for supporting dual VLAN */
5732                     (frame_size + 2 * RTE_VLAN_HLEN) > buf_size) {
5733                         if (!dev->data->scattered_rx)
5734                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5735                         dev->data->scattered_rx = 1;
5736                 }
5737
5738                 if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
5739                         rxmode->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5740         }
5741
5742         /* Set RQPL for VF RSS according to max Rx queue */
5743         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5744                 IXGBE_PSRTYPE_RQPL_SHIFT;
5745         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5746
5747         ixgbe_set_rx_function(dev);
5748
5749         return 0;
5750 }
5751
5752 /*
5753  * [VF] Initializes Transmit Unit.
5754  */
5755 void __rte_cold
5756 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5757 {
5758         struct ixgbe_hw     *hw;
5759         struct ixgbe_tx_queue *txq;
5760         uint64_t bus_addr;
5761         uint32_t txctrl;
5762         uint16_t i;
5763
5764         PMD_INIT_FUNC_TRACE();
5765         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5766
5767         /* Setup the Base and Length of the Tx Descriptor Rings */
5768         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5769                 txq = dev->data->tx_queues[i];
5770                 bus_addr = txq->tx_ring_phys_addr;
5771                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5772                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5773                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5774                                 (uint32_t)(bus_addr >> 32));
5775                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5776                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5777                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5778                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5779                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5780
5781                 /*
5782                  * Disable Tx Head Writeback RO bit, since this hoses
5783                  * bookkeeping if things aren't delivered in order.
5784                  */
5785                 txctrl = IXGBE_READ_REG(hw,
5786                                 IXGBE_VFDCA_TXCTRL(i));
5787                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5788                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5789                                 txctrl);
5790         }
5791 }
5792
5793 /*
5794  * [VF] Start Transmit and Receive Units.
5795  */
5796 void __rte_cold
5797 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5798 {
5799         struct ixgbe_hw     *hw;
5800         struct ixgbe_tx_queue *txq;
5801         struct ixgbe_rx_queue *rxq;
5802         uint32_t txdctl;
5803         uint32_t rxdctl;
5804         uint16_t i;
5805         int poll_ms;
5806
5807         PMD_INIT_FUNC_TRACE();
5808         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5809
5810         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5811                 txq = dev->data->tx_queues[i];
5812                 /* Setup Transmit Threshold Registers */
5813                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5814                 txdctl |= txq->pthresh & 0x7F;
5815                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5816                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5817                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5818         }
5819
5820         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5821
5822                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5823                 txdctl |= IXGBE_TXDCTL_ENABLE;
5824                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5825
5826                 poll_ms = 10;
5827                 /* Wait until TX Enable ready */
5828                 do {
5829                         rte_delay_ms(1);
5830                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5831                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5832                 if (!poll_ms)
5833                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5834         }
5835         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5836
5837                 rxq = dev->data->rx_queues[i];
5838
5839                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5840                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5841                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5842
5843                 /* Wait until RX Enable ready */
5844                 poll_ms = 10;
5845                 do {
5846                         rte_delay_ms(1);
5847                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5848                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5849                 if (!poll_ms)
5850                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5851                 rte_wmb();
5852                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5853
5854         }
5855 }
5856
5857 int
5858 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5859                     const struct rte_flow_action_rss *in)
5860 {
5861         if (in->key_len > RTE_DIM(out->key) ||
5862             in->queue_num > RTE_DIM(out->queue))
5863                 return -EINVAL;
5864         out->conf = (struct rte_flow_action_rss){
5865                 .func = in->func,
5866                 .level = in->level,
5867                 .types = in->types,
5868                 .key_len = in->key_len,
5869                 .queue_num = in->queue_num,
5870                 .key = memcpy(out->key, in->key, in->key_len),
5871                 .queue = memcpy(out->queue, in->queue,
5872                                 sizeof(*in->queue) * in->queue_num),
5873         };
5874         return 0;
5875 }
5876
5877 int
5878 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5879                       const struct rte_flow_action_rss *with)
5880 {
5881         return (comp->func == with->func &&
5882                 comp->level == with->level &&
5883                 comp->types == with->types &&
5884                 comp->key_len == with->key_len &&
5885                 comp->queue_num == with->queue_num &&
5886                 !memcmp(comp->key, with->key, with->key_len) &&
5887                 !memcmp(comp->queue, with->queue,
5888                         sizeof(*with->queue) * with->queue_num));
5889 }
5890
5891 int
5892 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5893                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5894 {
5895         struct ixgbe_hw *hw;
5896         uint32_t reta;
5897         uint16_t i;
5898         uint16_t j;
5899         uint16_t sp_reta_size;
5900         uint32_t reta_reg;
5901         struct rte_eth_rss_conf rss_conf = {
5902                 .rss_key = conf->conf.key_len ?
5903                         (void *)(uintptr_t)conf->conf.key : NULL,
5904                 .rss_key_len = conf->conf.key_len,
5905                 .rss_hf = conf->conf.types,
5906         };
5907         struct ixgbe_filter_info *filter_info =
5908                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5909
5910         PMD_INIT_FUNC_TRACE();
5911         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5912
5913         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5914
5915         if (!add) {
5916                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5917                                           &conf->conf)) {
5918                         ixgbe_rss_disable(dev);
5919                         memset(&filter_info->rss_info, 0,
5920                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5921                         return 0;
5922                 }
5923                 return -EINVAL;
5924         }
5925
5926         if (filter_info->rss_info.conf.queue_num)
5927                 return -EINVAL;
5928         /* Fill in redirection table
5929          * The byte-swap is needed because NIC registers are in
5930          * little-endian order.
5931          */
5932         reta = 0;
5933         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5934                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5935
5936                 if (j == conf->conf.queue_num)
5937                         j = 0;
5938                 reta = (reta << 8) | conf->conf.queue[j];
5939                 if ((i & 3) == 3)
5940                         IXGBE_WRITE_REG(hw, reta_reg,
5941                                         rte_bswap32(reta));
5942         }
5943
5944         /* Configure the RSS key and the RSS protocols used to compute
5945          * the RSS hash of input packets.
5946          */
5947         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5948                 ixgbe_rss_disable(dev);
5949                 return 0;
5950         }
5951         if (rss_conf.rss_key == NULL)
5952                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5953         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5954
5955         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5956                 return -EINVAL;
5957
5958         return 0;
5959 }
5960
5961 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5962 #if defined(RTE_ARCH_PPC_64)
5963 int
5964 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5965 {
5966         return -1;
5967 }
5968
5969 uint16_t
5970 ixgbe_recv_pkts_vec(
5971         void __rte_unused *rx_queue,
5972         struct rte_mbuf __rte_unused **rx_pkts,
5973         uint16_t __rte_unused nb_pkts)
5974 {
5975         return 0;
5976 }
5977
5978 uint16_t
5979 ixgbe_recv_scattered_pkts_vec(
5980         void __rte_unused *rx_queue,
5981         struct rte_mbuf __rte_unused **rx_pkts,
5982         uint16_t __rte_unused nb_pkts)
5983 {
5984         return 0;
5985 }
5986
5987 int
5988 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5989 {
5990         return -1;
5991 }
5992
5993 uint16_t
5994 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5995                 struct rte_mbuf __rte_unused **tx_pkts,
5996                 uint16_t __rte_unused nb_pkts)
5997 {
5998         return 0;
5999 }
6000
6001 int
6002 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
6003 {
6004         return -1;
6005 }
6006
6007 void
6008 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
6009 {
6010         return;
6011 }
6012 #endif