mbuf: add rte prefix to offload flags
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
39 #include <rte_udp.h>
40 #include <rte_tcp.h>
41 #include <rte_sctp.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
44 #include <rte_ip.h>
45 #include <rte_net.h>
46 #include <rte_vect.h>
47
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
55
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST RTE_MBUF_F_TX_IEEE1588_TMST
58 #else
59 #define IXGBE_TX_IEEE1588_TMST 0
60 #endif
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_OUTER_IPV6 |                \
63                 RTE_MBUF_F_TX_OUTER_IPV4 |               \
64                 RTE_MBUF_F_TX_IPV6 |                     \
65                 RTE_MBUF_F_TX_IPV4 |                     \
66                 RTE_MBUF_F_TX_VLAN_PKT |                 \
67                 RTE_MBUF_F_TX_IP_CKSUM |                 \
68                 RTE_MBUF_F_TX_L4_MASK |          \
69                 RTE_MBUF_F_TX_TCP_SEG |          \
70                 RTE_MBUF_F_TX_MACSEC |                   \
71                 RTE_MBUF_F_TX_OUTER_IP_CKSUM |           \
72                 RTE_MBUF_F_TX_SEC_OFFLOAD |      \
73                 IXGBE_TX_IEEE1588_TMST)
74
75 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
76                 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
77
78 #if 1
79 #define RTE_PMD_USE_PREFETCH
80 #endif
81
82 #ifdef RTE_PMD_USE_PREFETCH
83 /*
84  * Prefetch a cache line into all cache levels.
85  */
86 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
87 #else
88 #define rte_ixgbe_prefetch(p)   do {} while (0)
89 #endif
90
91 /*********************************************************************
92  *
93  *  TX functions
94  *
95  **********************************************************************/
96
97 /*
98  * Check for descriptors with their DD bit set and free mbufs.
99  * Return the total number of buffers freed.
100  */
101 static __rte_always_inline int
102 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
103 {
104         struct ixgbe_tx_entry *txep;
105         uint32_t status;
106         int i, nb_free = 0;
107         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
108
109         /* check DD bit on threshold descriptor */
110         status = txq->tx_ring[txq->tx_next_dd].wb.status;
111         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
112                 return 0;
113
114         /*
115          * first buffer to free from S/W ring is at index
116          * tx_next_dd - (tx_rs_thresh-1)
117          */
118         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
119
120         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
121                 /* free buffers one at a time */
122                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
123                 txep->mbuf = NULL;
124
125                 if (unlikely(m == NULL))
126                         continue;
127
128                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
129                     (nb_free > 0 && m->pool != free[0]->pool)) {
130                         rte_mempool_put_bulk(free[0]->pool,
131                                              (void **)free, nb_free);
132                         nb_free = 0;
133                 }
134
135                 free[nb_free++] = m;
136         }
137
138         if (nb_free > 0)
139                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
140
141         /* buffers were freed, update counters */
142         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
143         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
144         if (txq->tx_next_dd >= txq->nb_tx_desc)
145                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
146
147         return txq->tx_rs_thresh;
148 }
149
150 /* Populate 4 descriptors with data from 4 mbufs */
151 static inline void
152 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
153 {
154         uint64_t buf_dma_addr;
155         uint32_t pkt_len;
156         int i;
157
158         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
159                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
160                 pkt_len = (*pkts)->data_len;
161
162                 /* write data to descriptor */
163                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
164
165                 txdp->read.cmd_type_len =
166                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
167
168                 txdp->read.olinfo_status =
169                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
170
171                 rte_prefetch0(&(*pkts)->pool);
172         }
173 }
174
175 /* Populate 1 descriptor with data from 1 mbuf */
176 static inline void
177 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
178 {
179         uint64_t buf_dma_addr;
180         uint32_t pkt_len;
181
182         buf_dma_addr = rte_mbuf_data_iova(*pkts);
183         pkt_len = (*pkts)->data_len;
184
185         /* write data to descriptor */
186         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187         txdp->read.cmd_type_len =
188                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
189         txdp->read.olinfo_status =
190                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
191         rte_prefetch0(&(*pkts)->pool);
192 }
193
194 /*
195  * Fill H/W descriptor ring with mbuf data.
196  * Copy mbuf pointers to the S/W ring.
197  */
198 static inline void
199 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
200                       uint16_t nb_pkts)
201 {
202         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
203         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
204         const int N_PER_LOOP = 4;
205         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
206         int mainpart, leftover;
207         int i, j;
208
209         /*
210          * Process most of the packets in chunks of N pkts.  Any
211          * leftover packets will get processed one at a time.
212          */
213         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
214         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
215         for (i = 0; i < mainpart; i += N_PER_LOOP) {
216                 /* Copy N mbuf pointers to the S/W ring */
217                 for (j = 0; j < N_PER_LOOP; ++j) {
218                         (txep + i + j)->mbuf = *(pkts + i + j);
219                 }
220                 tx4(txdp + i, pkts + i);
221         }
222
223         if (unlikely(leftover > 0)) {
224                 for (i = 0; i < leftover; ++i) {
225                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
226                         tx1(txdp + mainpart + i, pkts + mainpart + i);
227                 }
228         }
229 }
230
231 static inline uint16_t
232 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
233              uint16_t nb_pkts)
234 {
235         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
236         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
237         uint16_t n = 0;
238
239         /*
240          * Begin scanning the H/W ring for done descriptors when the
241          * number of available descriptors drops below tx_free_thresh.  For
242          * each done descriptor, free the associated buffer.
243          */
244         if (txq->nb_tx_free < txq->tx_free_thresh)
245                 ixgbe_tx_free_bufs(txq);
246
247         /* Only use descriptors that are available */
248         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
249         if (unlikely(nb_pkts == 0))
250                 return 0;
251
252         /* Use exactly nb_pkts descriptors */
253         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
254
255         /*
256          * At this point, we know there are enough descriptors in the
257          * ring to transmit all the packets.  This assumes that each
258          * mbuf contains a single segment, and that no new offloads
259          * are expected, which would require a new context descriptor.
260          */
261
262         /*
263          * See if we're going to wrap-around. If so, handle the top
264          * of the descriptor ring first, then do the bottom.  If not,
265          * the processing looks just like the "bottom" part anyway...
266          */
267         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
268                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
269                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
270
271                 /*
272                  * We know that the last descriptor in the ring will need to
273                  * have its RS bit set because tx_rs_thresh has to be
274                  * a divisor of the ring size
275                  */
276                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
277                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
278                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
279
280                 txq->tx_tail = 0;
281         }
282
283         /* Fill H/W descriptor ring with mbuf data */
284         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
285         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
286
287         /*
288          * Determine if RS bit should be set
289          * This is what we actually want:
290          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
291          * but instead of subtracting 1 and doing >=, we can just do
292          * greater than without subtracting.
293          */
294         if (txq->tx_tail > txq->tx_next_rs) {
295                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
296                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
297                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
298                                                 txq->tx_rs_thresh);
299                 if (txq->tx_next_rs >= txq->nb_tx_desc)
300                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
301         }
302
303         /*
304          * Check for wrap-around. This would only happen if we used
305          * up to the last descriptor in the ring, no more, no less.
306          */
307         if (txq->tx_tail >= txq->nb_tx_desc)
308                 txq->tx_tail = 0;
309
310         /* update tail pointer */
311         rte_wmb();
312         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
313
314         return nb_pkts;
315 }
316
317 uint16_t
318 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
319                        uint16_t nb_pkts)
320 {
321         uint16_t nb_tx;
322
323         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
324         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
325                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
326
327         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
328         nb_tx = 0;
329         while (nb_pkts) {
330                 uint16_t ret, n;
331
332                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
333                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
334                 nb_tx = (uint16_t)(nb_tx + ret);
335                 nb_pkts = (uint16_t)(nb_pkts - ret);
336                 if (ret < n)
337                         break;
338         }
339
340         return nb_tx;
341 }
342
343 static uint16_t
344 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
345                     uint16_t nb_pkts)
346 {
347         uint16_t nb_tx = 0;
348         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
349
350         while (nb_pkts) {
351                 uint16_t ret, num;
352
353                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
354                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
355                                                  num);
356                 nb_tx += ret;
357                 nb_pkts -= ret;
358                 if (ret < num)
359                         break;
360         }
361
362         return nb_tx;
363 }
364
365 static inline void
366 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
367                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
368                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
369                 __rte_unused uint64_t *mdata)
370 {
371         uint32_t type_tucmd_mlhl;
372         uint32_t mss_l4len_idx = 0;
373         uint32_t ctx_idx;
374         uint32_t vlan_macip_lens;
375         union ixgbe_tx_offload tx_offload_mask;
376         uint32_t seqnum_seed = 0;
377
378         ctx_idx = txq->ctx_curr;
379         tx_offload_mask.data[0] = 0;
380         tx_offload_mask.data[1] = 0;
381         type_tucmd_mlhl = 0;
382
383         /* Specify which HW CTX to upload. */
384         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
385
386         if (ol_flags & RTE_MBUF_F_TX_VLAN_PKT) {
387                 tx_offload_mask.vlan_tci |= ~0;
388         }
389
390         /* check if TCP segmentation required for this packet */
391         if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
392                 /* implies IP cksum in IPv4 */
393                 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
394                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
397                 else
398                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
401
402                 tx_offload_mask.l2_len |= ~0;
403                 tx_offload_mask.l3_len |= ~0;
404                 tx_offload_mask.l4_len |= ~0;
405                 tx_offload_mask.tso_segsz |= ~0;
406                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408         } else { /* no TSO, check if hardware checksum is needed */
409                 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
410                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411                         tx_offload_mask.l2_len |= ~0;
412                         tx_offload_mask.l3_len |= ~0;
413                 }
414
415                 switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
416                 case RTE_MBUF_F_TX_UDP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
420                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
421                         tx_offload_mask.l2_len |= ~0;
422                         tx_offload_mask.l3_len |= ~0;
423                         break;
424                 case RTE_MBUF_F_TX_TCP_CKSUM:
425                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
426                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
427                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
428                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
429                         tx_offload_mask.l2_len |= ~0;
430                         tx_offload_mask.l3_len |= ~0;
431                         break;
432                 case RTE_MBUF_F_TX_SCTP_CKSUM:
433                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
434                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
435                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
436                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
437                         tx_offload_mask.l2_len |= ~0;
438                         tx_offload_mask.l3_len |= ~0;
439                         break;
440                 default:
441                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
442                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
443                         break;
444                 }
445         }
446
447         if (ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) {
448                 tx_offload_mask.outer_l2_len |= ~0;
449                 tx_offload_mask.outer_l3_len |= ~0;
450                 tx_offload_mask.l2_len |= ~0;
451                 seqnum_seed |= tx_offload.outer_l3_len
452                                << IXGBE_ADVTXD_OUTER_IPLEN;
453                 seqnum_seed |= tx_offload.l2_len
454                                << IXGBE_ADVTXD_TUNNEL_LEN;
455         }
456 #ifdef RTE_LIB_SECURITY
457         if (ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
458                 union ixgbe_crypto_tx_desc_md *md =
459                                 (union ixgbe_crypto_tx_desc_md *)mdata;
460                 seqnum_seed |=
461                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
462                 type_tucmd_mlhl |= md->enc ?
463                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
464                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
465                 type_tucmd_mlhl |=
466                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
467                 tx_offload_mask.sa_idx |= ~0;
468                 tx_offload_mask.sec_pad_len |= ~0;
469         }
470 #endif
471
472         txq->ctx_cache[ctx_idx].flags = ol_flags;
473         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
474                 tx_offload_mask.data[0] & tx_offload.data[0];
475         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
476                 tx_offload_mask.data[1] & tx_offload.data[1];
477         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
478
479         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
480         vlan_macip_lens = tx_offload.l3_len;
481         if (ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
482                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
483                                     IXGBE_ADVTXD_MACLEN_SHIFT);
484         else
485                 vlan_macip_lens |= (tx_offload.l2_len <<
486                                     IXGBE_ADVTXD_MACLEN_SHIFT);
487         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
488         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
489         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
490         ctx_txd->seqnum_seed     = seqnum_seed;
491 }
492
493 /*
494  * Check which hardware context can be used. Use the existing match
495  * or create a new context descriptor.
496  */
497 static inline uint32_t
498 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
499                    union ixgbe_tx_offload tx_offload)
500 {
501         /* If match with the current used context */
502         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
503                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
504                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
505                      & tx_offload.data[0])) &&
506                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
507                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
508                      & tx_offload.data[1]))))
509                 return txq->ctx_curr;
510
511         /* What if match with the next context  */
512         txq->ctx_curr ^= 1;
513         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
514                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
515                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
516                      & tx_offload.data[0])) &&
517                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
518                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
519                      & tx_offload.data[1]))))
520                 return txq->ctx_curr;
521
522         /* Mismatch, use the previous context */
523         return IXGBE_CTX_NUM;
524 }
525
526 static inline uint32_t
527 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
528 {
529         uint32_t tmp = 0;
530
531         if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) != RTE_MBUF_F_TX_L4_NO_CKSUM)
532                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
533         if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
534                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
535         if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
536                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
537         return tmp;
538 }
539
540 static inline uint32_t
541 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
542 {
543         uint32_t cmdtype = 0;
544
545         if (ol_flags & RTE_MBUF_F_TX_VLAN_PKT)
546                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
547         if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
548                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
549         if (ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM)
550                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
551         if (ol_flags & RTE_MBUF_F_TX_MACSEC)
552                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
553         return cmdtype;
554 }
555
556 /* Default RS bit threshold values */
557 #ifndef DEFAULT_TX_RS_THRESH
558 #define DEFAULT_TX_RS_THRESH   32
559 #endif
560 #ifndef DEFAULT_TX_FREE_THRESH
561 #define DEFAULT_TX_FREE_THRESH 32
562 #endif
563
564 /* Reset transmit descriptors after they have been used */
565 static inline int
566 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
567 {
568         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
569         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
570         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
571         uint16_t nb_tx_desc = txq->nb_tx_desc;
572         uint16_t desc_to_clean_to;
573         uint16_t nb_tx_to_clean;
574         uint32_t status;
575
576         /* Determine the last descriptor needing to be cleaned */
577         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
578         if (desc_to_clean_to >= nb_tx_desc)
579                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
580
581         /* Check to make sure the last descriptor to clean is done */
582         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
583         status = txr[desc_to_clean_to].wb.status;
584         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
585                 PMD_TX_LOG(DEBUG,
586                            "TX descriptor %4u is not done"
587                            "(port=%d queue=%d)",
588                            desc_to_clean_to,
589                            txq->port_id, txq->queue_id);
590                 /* Failed to clean any descriptors, better luck next time */
591                 return -(1);
592         }
593
594         /* Figure out how many descriptors will be cleaned */
595         if (last_desc_cleaned > desc_to_clean_to)
596                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
597                                                         desc_to_clean_to);
598         else
599                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
600                                                 last_desc_cleaned);
601
602         PMD_TX_LOG(DEBUG,
603                    "Cleaning %4u TX descriptors: %4u to %4u "
604                    "(port=%d queue=%d)",
605                    nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
606                    txq->port_id, txq->queue_id);
607
608         /*
609          * The last descriptor to clean is done, so that means all the
610          * descriptors from the last descriptor that was cleaned
611          * up to the last descriptor with the RS bit set
612          * are done. Only reset the threshold descriptor.
613          */
614         txr[desc_to_clean_to].wb.status = 0;
615
616         /* Update the txq to reflect the last descriptor that was cleaned */
617         txq->last_desc_cleaned = desc_to_clean_to;
618         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
619
620         /* No Error */
621         return 0;
622 }
623
624 uint16_t
625 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
626                 uint16_t nb_pkts)
627 {
628         struct ixgbe_tx_queue *txq;
629         struct ixgbe_tx_entry *sw_ring;
630         struct ixgbe_tx_entry *txe, *txn;
631         volatile union ixgbe_adv_tx_desc *txr;
632         volatile union ixgbe_adv_tx_desc *txd, *txp;
633         struct rte_mbuf     *tx_pkt;
634         struct rte_mbuf     *m_seg;
635         uint64_t buf_dma_addr;
636         uint32_t olinfo_status;
637         uint32_t cmd_type_len;
638         uint32_t pkt_len;
639         uint16_t slen;
640         uint64_t ol_flags;
641         uint16_t tx_id;
642         uint16_t tx_last;
643         uint16_t nb_tx;
644         uint16_t nb_used;
645         uint64_t tx_ol_req;
646         uint32_t ctx = 0;
647         uint32_t new_ctx;
648         union ixgbe_tx_offload tx_offload;
649 #ifdef RTE_LIB_SECURITY
650         uint8_t use_ipsec;
651 #endif
652
653         tx_offload.data[0] = 0;
654         tx_offload.data[1] = 0;
655         txq = tx_queue;
656         sw_ring = txq->sw_ring;
657         txr     = txq->tx_ring;
658         tx_id   = txq->tx_tail;
659         txe = &sw_ring[tx_id];
660         txp = NULL;
661
662         /* Determine if the descriptor ring needs to be cleaned. */
663         if (txq->nb_tx_free < txq->tx_free_thresh)
664                 ixgbe_xmit_cleanup(txq);
665
666         rte_prefetch0(&txe->mbuf->pool);
667
668         /* TX loop */
669         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
670                 new_ctx = 0;
671                 tx_pkt = *tx_pkts++;
672                 pkt_len = tx_pkt->pkt_len;
673
674                 /*
675                  * Determine how many (if any) context descriptors
676                  * are needed for offload functionality.
677                  */
678                 ol_flags = tx_pkt->ol_flags;
679 #ifdef RTE_LIB_SECURITY
680                 use_ipsec = txq->using_ipsec && (ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
681 #endif
682
683                 /* If hardware offload required */
684                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
685                 if (tx_ol_req) {
686                         tx_offload.l2_len = tx_pkt->l2_len;
687                         tx_offload.l3_len = tx_pkt->l3_len;
688                         tx_offload.l4_len = tx_pkt->l4_len;
689                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
690                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
691                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
692                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
693 #ifdef RTE_LIB_SECURITY
694                         if (use_ipsec) {
695                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
696                                         (union ixgbe_crypto_tx_desc_md *)
697                                                 rte_security_dynfield(tx_pkt);
698                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
699                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
700                         }
701 #endif
702
703                         /* If new context need be built or reuse the exist ctx. */
704                         ctx = what_advctx_update(txq, tx_ol_req,
705                                 tx_offload);
706                         /* Only allocate context descriptor if required*/
707                         new_ctx = (ctx == IXGBE_CTX_NUM);
708                         ctx = txq->ctx_curr;
709                 }
710
711                 /*
712                  * Keep track of how many descriptors are used this loop
713                  * This will always be the number of segments + the number of
714                  * Context descriptors required to transmit the packet
715                  */
716                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
717
718                 if (txp != NULL &&
719                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
720                         /* set RS on the previous packet in the burst */
721                         txp->read.cmd_type_len |=
722                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
723
724                 /*
725                  * The number of descriptors that must be allocated for a
726                  * packet is the number of segments of that packet, plus 1
727                  * Context Descriptor for the hardware offload, if any.
728                  * Determine the last TX descriptor to allocate in the TX ring
729                  * for the packet, starting from the current position (tx_id)
730                  * in the ring.
731                  */
732                 tx_last = (uint16_t) (tx_id + nb_used - 1);
733
734                 /* Circular ring */
735                 if (tx_last >= txq->nb_tx_desc)
736                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
737
738                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
739                            " tx_first=%u tx_last=%u",
740                            (unsigned) txq->port_id,
741                            (unsigned) txq->queue_id,
742                            (unsigned) pkt_len,
743                            (unsigned) tx_id,
744                            (unsigned) tx_last);
745
746                 /*
747                  * Make sure there are enough TX descriptors available to
748                  * transmit the entire packet.
749                  * nb_used better be less than or equal to txq->tx_rs_thresh
750                  */
751                 if (nb_used > txq->nb_tx_free) {
752                         PMD_TX_LOG(DEBUG,
753                                    "Not enough free TX descriptors "
754                                    "nb_used=%4u nb_free=%4u "
755                                    "(port=%d queue=%d)",
756                                    nb_used, txq->nb_tx_free,
757                                    txq->port_id, txq->queue_id);
758
759                         if (ixgbe_xmit_cleanup(txq) != 0) {
760                                 /* Could not clean any descriptors */
761                                 if (nb_tx == 0)
762                                         return 0;
763                                 goto end_of_tx;
764                         }
765
766                         /* nb_used better be <= txq->tx_rs_thresh */
767                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
768                                 PMD_TX_LOG(DEBUG,
769                                            "The number of descriptors needed to "
770                                            "transmit the packet exceeds the "
771                                            "RS bit threshold. This will impact "
772                                            "performance."
773                                            "nb_used=%4u nb_free=%4u "
774                                            "tx_rs_thresh=%4u. "
775                                            "(port=%d queue=%d)",
776                                            nb_used, txq->nb_tx_free,
777                                            txq->tx_rs_thresh,
778                                            txq->port_id, txq->queue_id);
779                                 /*
780                                  * Loop here until there are enough TX
781                                  * descriptors or until the ring cannot be
782                                  * cleaned.
783                                  */
784                                 while (nb_used > txq->nb_tx_free) {
785                                         if (ixgbe_xmit_cleanup(txq) != 0) {
786                                                 /*
787                                                  * Could not clean any
788                                                  * descriptors
789                                                  */
790                                                 if (nb_tx == 0)
791                                                         return 0;
792                                                 goto end_of_tx;
793                                         }
794                                 }
795                         }
796                 }
797
798                 /*
799                  * By now there are enough free TX descriptors to transmit
800                  * the packet.
801                  */
802
803                 /*
804                  * Set common flags of all TX Data Descriptors.
805                  *
806                  * The following bits must be set in all Data Descriptors:
807                  *   - IXGBE_ADVTXD_DTYP_DATA
808                  *   - IXGBE_ADVTXD_DCMD_DEXT
809                  *
810                  * The following bits must be set in the first Data Descriptor
811                  * and are ignored in the other ones:
812                  *   - IXGBE_ADVTXD_DCMD_IFCS
813                  *   - IXGBE_ADVTXD_MAC_1588
814                  *   - IXGBE_ADVTXD_DCMD_VLE
815                  *
816                  * The following bits must only be set in the last Data
817                  * Descriptor:
818                  *   - IXGBE_TXD_CMD_EOP
819                  *
820                  * The following bits can be set in any Data Descriptor, but
821                  * are only set in the last Data Descriptor:
822                  *   - IXGBE_TXD_CMD_RS
823                  */
824                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
825                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
826
827 #ifdef RTE_LIBRTE_IEEE1588
828                 if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
829                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
830 #endif
831
832                 olinfo_status = 0;
833                 if (tx_ol_req) {
834
835                         if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
836                                 /* when TSO is on, paylen in descriptor is the
837                                  * not the packet len but the tcp payload len */
838                                 pkt_len -= (tx_offload.l2_len +
839                                         tx_offload.l3_len + tx_offload.l4_len);
840                         }
841
842                         /*
843                          * Setup the TX Advanced Context Descriptor if required
844                          */
845                         if (new_ctx) {
846                                 volatile struct ixgbe_adv_tx_context_desc *
847                                     ctx_txd;
848
849                                 ctx_txd = (volatile struct
850                                     ixgbe_adv_tx_context_desc *)
851                                     &txr[tx_id];
852
853                                 txn = &sw_ring[txe->next_id];
854                                 rte_prefetch0(&txn->mbuf->pool);
855
856                                 if (txe->mbuf != NULL) {
857                                         rte_pktmbuf_free_seg(txe->mbuf);
858                                         txe->mbuf = NULL;
859                                 }
860
861                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
862                                         tx_offload,
863                                         rte_security_dynfield(tx_pkt));
864
865                                 txe->last_id = tx_last;
866                                 tx_id = txe->next_id;
867                                 txe = txn;
868                         }
869
870                         /*
871                          * Setup the TX Advanced Data Descriptor,
872                          * This path will go through
873                          * whatever new/reuse the context descriptor
874                          */
875                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
876                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
877                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
878                 }
879
880                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
881 #ifdef RTE_LIB_SECURITY
882                 if (use_ipsec)
883                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
884 #endif
885
886                 m_seg = tx_pkt;
887                 do {
888                         txd = &txr[tx_id];
889                         txn = &sw_ring[txe->next_id];
890                         rte_prefetch0(&txn->mbuf->pool);
891
892                         if (txe->mbuf != NULL)
893                                 rte_pktmbuf_free_seg(txe->mbuf);
894                         txe->mbuf = m_seg;
895
896                         /*
897                          * Set up Transmit Data Descriptor.
898                          */
899                         slen = m_seg->data_len;
900                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
901                         txd->read.buffer_addr =
902                                 rte_cpu_to_le_64(buf_dma_addr);
903                         txd->read.cmd_type_len =
904                                 rte_cpu_to_le_32(cmd_type_len | slen);
905                         txd->read.olinfo_status =
906                                 rte_cpu_to_le_32(olinfo_status);
907                         txe->last_id = tx_last;
908                         tx_id = txe->next_id;
909                         txe = txn;
910                         m_seg = m_seg->next;
911                 } while (m_seg != NULL);
912
913                 /*
914                  * The last packet data descriptor needs End Of Packet (EOP)
915                  */
916                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
917                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
918                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
919
920                 /* Set RS bit only on threshold packets' last descriptor */
921                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
922                         PMD_TX_LOG(DEBUG,
923                                    "Setting RS bit on TXD id="
924                                    "%4u (port=%d queue=%d)",
925                                    tx_last, txq->port_id, txq->queue_id);
926
927                         cmd_type_len |= IXGBE_TXD_CMD_RS;
928
929                         /* Update txq RS bit counters */
930                         txq->nb_tx_used = 0;
931                         txp = NULL;
932                 } else
933                         txp = txd;
934
935                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
936         }
937
938 end_of_tx:
939         /* set RS on last packet in the burst */
940         if (txp != NULL)
941                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
942
943         rte_wmb();
944
945         /*
946          * Set the Transmit Descriptor Tail (TDT)
947          */
948         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
949                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
950                    (unsigned) tx_id, (unsigned) nb_tx);
951         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
952         txq->tx_tail = tx_id;
953
954         return nb_tx;
955 }
956
957 /*********************************************************************
958  *
959  *  TX prep functions
960  *
961  **********************************************************************/
962 uint16_t
963 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
964 {
965         int i, ret;
966         uint64_t ol_flags;
967         struct rte_mbuf *m;
968         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
969
970         for (i = 0; i < nb_pkts; i++) {
971                 m = tx_pkts[i];
972                 ol_flags = m->ol_flags;
973
974                 /**
975                  * Check if packet meets requirements for number of segments
976                  *
977                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
978                  *       non-TSO
979                  */
980
981                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
982                         rte_errno = EINVAL;
983                         return i;
984                 }
985
986                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
987                         rte_errno = ENOTSUP;
988                         return i;
989                 }
990
991                 /* check the size of packet */
992                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
993                         rte_errno = EINVAL;
994                         return i;
995                 }
996
997 #ifdef RTE_ETHDEV_DEBUG_TX
998                 ret = rte_validate_tx_offload(m);
999                 if (ret != 0) {
1000                         rte_errno = -ret;
1001                         return i;
1002                 }
1003 #endif
1004                 ret = rte_net_intel_cksum_prepare(m);
1005                 if (ret != 0) {
1006                         rte_errno = -ret;
1007                         return i;
1008                 }
1009         }
1010
1011         return i;
1012 }
1013
1014 /*********************************************************************
1015  *
1016  *  RX functions
1017  *
1018  **********************************************************************/
1019
1020 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1021 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1022 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1023 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1024 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1029 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1030 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1031 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1032 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1036 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1053
1054 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1077
1078 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1101
1102 /**
1103  * Use 2 different table for normal packet and tunnel packet
1104  * to save the space.
1105  */
1106 const uint32_t
1107         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1108         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1109         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1110                 RTE_PTYPE_L3_IPV4,
1111         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1112                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1113         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1114                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1115         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1116                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1117         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1118                 RTE_PTYPE_L3_IPV4_EXT,
1119         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1120                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1121         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1122                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1123         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1124                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1125         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1126                 RTE_PTYPE_L3_IPV6,
1127         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1128                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1129         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1131         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1132                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1133         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1134                 RTE_PTYPE_L3_IPV6_EXT,
1135         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1136                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1137         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1138                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1139         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1140                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1141         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1142                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1143                 RTE_PTYPE_INNER_L3_IPV6,
1144         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1145                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1146                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1147         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1148                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1149         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1150         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1151                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1152                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1153         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1154                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1155                 RTE_PTYPE_INNER_L3_IPV6,
1156         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1157                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1158                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1159         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1160                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1161                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1162         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1163                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1164                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1165         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1166                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1167                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1168         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1169                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1170                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1171         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1172                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1173                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1174         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1175                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1176                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1177         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1178                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1179                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1180         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1181                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1182                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1183         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1184                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1185                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1186         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1187                 RTE_PTYPE_L2_ETHER |
1188                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1189                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1190 };
1191
1192 const uint32_t
1193         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1194         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1195                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1196                 RTE_PTYPE_INNER_L2_ETHER,
1197         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1198                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1199                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1200         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1201                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1202                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1203         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1204                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1205                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1206         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1207                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1209         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1210                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1211                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1212         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1213                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1214                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1215         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1216                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1217                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1218                 RTE_PTYPE_INNER_L4_TCP,
1219         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1220                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1222                 RTE_PTYPE_INNER_L4_TCP,
1223         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1224                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1225                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1226         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1227                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1228                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1229                 RTE_PTYPE_INNER_L4_TCP,
1230         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1231                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1232                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1233                 RTE_PTYPE_INNER_L3_IPV4,
1234         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1235                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1236                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1237                 RTE_PTYPE_INNER_L4_UDP,
1238         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1239                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1240                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1241                 RTE_PTYPE_INNER_L4_UDP,
1242         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1243                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1244                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1245                 RTE_PTYPE_INNER_L4_SCTP,
1246         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1247                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1248                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1249         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1250                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1251                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1252                 RTE_PTYPE_INNER_L4_UDP,
1253         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1254                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1255                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1256                 RTE_PTYPE_INNER_L4_SCTP,
1257         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1258                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1259                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1260                 RTE_PTYPE_INNER_L3_IPV4,
1261         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1262                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1263                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1264                 RTE_PTYPE_INNER_L4_SCTP,
1265         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1266                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1267                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1268                 RTE_PTYPE_INNER_L4_SCTP,
1269         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1270                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1271                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1272                 RTE_PTYPE_INNER_L4_TCP,
1273         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1274                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1275                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1276                 RTE_PTYPE_INNER_L4_UDP,
1277
1278         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1279                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1280                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1281         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1282                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                 RTE_PTYPE_INNER_L3_IPV4,
1285         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1286                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1287                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1288                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1289         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1290                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                 RTE_PTYPE_INNER_L3_IPV6,
1293         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1294                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                 RTE_PTYPE_INNER_L3_IPV4,
1297         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1298                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1301         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1302                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                 RTE_PTYPE_INNER_L3_IPV4,
1305         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1306                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1309         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1310                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1313         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1314                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1315                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1316                 RTE_PTYPE_INNER_L3_IPV4,
1317         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1318                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1321         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1322                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1323                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1324                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1325         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1326                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1329         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1330                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1333         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1334                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1335                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1336                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1337         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1338                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1339                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1340                 RTE_PTYPE_INNER_L3_IPV4,
1341         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1342                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1343                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1344                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1345         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1346                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1347                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1348                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1349         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1350                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1351                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1352                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1353         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1354                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1355                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1356                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1357         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1358                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1359                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1360                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1361         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1362                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1363                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1364                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1365         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1366                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1367                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1368                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1369 };
1370
1371 static int
1372 ixgbe_monitor_callback(const uint64_t value,
1373                 const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
1374 {
1375         const uint64_t m = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1376         /*
1377          * we expect the DD bit to be set to 1 if this descriptor was already
1378          * written to.
1379          */
1380         return (value & m) == m ? -1 : 0;
1381 }
1382
1383 int
1384 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1385 {
1386         volatile union ixgbe_adv_rx_desc *rxdp;
1387         struct ixgbe_rx_queue *rxq = rx_queue;
1388         uint16_t desc;
1389
1390         desc = rxq->rx_tail;
1391         rxdp = &rxq->rx_ring[desc];
1392         /* watch for changes in status bit */
1393         pmc->addr = &rxdp->wb.upper.status_error;
1394
1395         /* comparison callback */
1396         pmc->fn = ixgbe_monitor_callback;
1397
1398         /* the registers are 32-bit */
1399         pmc->size = sizeof(uint32_t);
1400
1401         return 0;
1402 }
1403
1404 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1405 static inline uint32_t
1406 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1407 {
1408
1409         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1410                 return RTE_PTYPE_UNKNOWN;
1411
1412         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1413
1414         /* For tunnel packet */
1415         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1416                 /* Remove the tunnel bit to save the space. */
1417                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1418                 return ptype_table_tn[pkt_info];
1419         }
1420
1421         /**
1422          * For x550, if it's not tunnel,
1423          * tunnel type bit should be set to 0.
1424          * Reuse 82599's mask.
1425          */
1426         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1427
1428         return ptype_table[pkt_info];
1429 }
1430
1431 static inline uint64_t
1432 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1433 {
1434         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1435                 0, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
1436                 0, RTE_MBUF_F_RX_RSS_HASH, 0, RTE_MBUF_F_RX_RSS_HASH,
1437                 RTE_MBUF_F_RX_RSS_HASH, 0, 0, 0,
1438                 0, 0, 0,  RTE_MBUF_F_RX_FDIR,
1439         };
1440 #ifdef RTE_LIBRTE_IEEE1588
1441         static uint64_t ip_pkt_etqf_map[8] = {
1442                 0, 0, 0, RTE_MBUF_F_RX_IEEE1588_PTP,
1443                 0, 0, 0, 0,
1444         };
1445
1446         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1447                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1448                                 ip_rss_types_map[pkt_info & 0XF];
1449         else
1450                 return ip_rss_types_map[pkt_info & 0XF];
1451 #else
1452         return ip_rss_types_map[pkt_info & 0XF];
1453 #endif
1454 }
1455
1456 static inline uint64_t
1457 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1458 {
1459         uint64_t pkt_flags;
1460
1461         /*
1462          * Check if VLAN present only.
1463          * Do not check whether L3/L4 rx checksum done by NIC or not,
1464          * That can be found from rte_eth_rxmode.offloads flag
1465          */
1466         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1467
1468 #ifdef RTE_LIBRTE_IEEE1588
1469         if (rx_status & IXGBE_RXD_STAT_TMST)
1470                 pkt_flags = pkt_flags | RTE_MBUF_F_RX_IEEE1588_TMST;
1471 #endif
1472         return pkt_flags;
1473 }
1474
1475 static inline uint64_t
1476 rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
1477                            uint8_t rx_udp_csum_zero_err)
1478 {
1479         uint64_t pkt_flags;
1480
1481         /*
1482          * Bit 31: IPE, IPv4 checksum error
1483          * Bit 30: L4I, L4I integrity error
1484          */
1485         static uint64_t error_to_pkt_flags_map[4] = {
1486                 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
1487                 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
1488                 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
1489                 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD
1490         };
1491         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1492                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1493
1494         /* Mask out the bad UDP checksum error if the hardware has UDP zero
1495          * checksum error issue, so that the software application will then
1496          * have to recompute the checksum itself if needed.
1497          */
1498         if ((rx_status & IXGBE_RXDADV_ERR_TCPE) &&
1499             (pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) &&
1500             rx_udp_csum_zero_err)
1501                 pkt_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_BAD;
1502
1503         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1504             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1505                 pkt_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
1506         }
1507
1508 #ifdef RTE_LIB_SECURITY
1509         if (rx_status & IXGBE_RXD_STAT_SECP) {
1510                 pkt_flags |= RTE_MBUF_F_RX_SEC_OFFLOAD;
1511                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1512                         pkt_flags |= RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED;
1513         }
1514 #endif
1515
1516         return pkt_flags;
1517 }
1518
1519 /*
1520  * LOOK_AHEAD defines how many desc statuses to check beyond the
1521  * current descriptor.
1522  * It must be a pound define for optimal performance.
1523  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1524  * function only works with LOOK_AHEAD=8.
1525  */
1526 #define LOOK_AHEAD 8
1527 #if (LOOK_AHEAD != 8)
1528 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1529 #endif
1530 static inline int
1531 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1532 {
1533         volatile union ixgbe_adv_rx_desc *rxdp;
1534         struct ixgbe_rx_entry *rxep;
1535         struct rte_mbuf *mb;
1536         uint16_t pkt_len;
1537         uint64_t pkt_flags;
1538         int nb_dd;
1539         uint32_t s[LOOK_AHEAD];
1540         uint32_t pkt_info[LOOK_AHEAD];
1541         int i, j, nb_rx = 0;
1542         uint32_t status;
1543         uint64_t vlan_flags = rxq->vlan_flags;
1544
1545         /* get references to current descriptor and S/W ring entry */
1546         rxdp = &rxq->rx_ring[rxq->rx_tail];
1547         rxep = &rxq->sw_ring[rxq->rx_tail];
1548
1549         status = rxdp->wb.upper.status_error;
1550         /* check to make sure there is at least 1 packet to receive */
1551         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1552                 return 0;
1553
1554         /*
1555          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1556          * reference packets that are ready to be received.
1557          */
1558         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1559              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1560                 /* Read desc statuses backwards to avoid race condition */
1561                 for (j = 0; j < LOOK_AHEAD; j++)
1562                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1563
1564                 rte_smp_rmb();
1565
1566                 /* Compute how many status bits were set */
1567                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1568                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1569                         ;
1570
1571                 for (j = 0; j < nb_dd; j++)
1572                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1573                                                        lo_dword.data);
1574
1575                 nb_rx += nb_dd;
1576
1577                 /* Translate descriptor info to mbuf format */
1578                 for (j = 0; j < nb_dd; ++j) {
1579                         mb = rxep[j].mbuf;
1580                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1581                                   rxq->crc_len;
1582                         mb->data_len = pkt_len;
1583                         mb->pkt_len = pkt_len;
1584                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1585
1586                         /* convert descriptor fields to rte mbuf flags */
1587                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1588                                 vlan_flags);
1589                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j],
1590                                         (uint16_t)pkt_info[j],
1591                                         rxq->rx_udp_csum_zero_err);
1592                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1593                                         ((uint16_t)pkt_info[j]);
1594                         mb->ol_flags = pkt_flags;
1595                         mb->packet_type =
1596                                 ixgbe_rxd_pkt_info_to_pkt_type
1597                                         (pkt_info[j], rxq->pkt_type_mask);
1598
1599                         if (likely(pkt_flags & RTE_MBUF_F_RX_RSS_HASH))
1600                                 mb->hash.rss = rte_le_to_cpu_32(
1601                                     rxdp[j].wb.lower.hi_dword.rss);
1602                         else if (pkt_flags & RTE_MBUF_F_RX_FDIR) {
1603                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1604                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1605                                     IXGBE_ATR_HASH_MASK;
1606                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1607                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1608                         }
1609                 }
1610
1611                 /* Move mbuf pointers from the S/W ring to the stage */
1612                 for (j = 0; j < LOOK_AHEAD; ++j) {
1613                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1614                 }
1615
1616                 /* stop if all requested packets could not be received */
1617                 if (nb_dd != LOOK_AHEAD)
1618                         break;
1619         }
1620
1621         /* clear software ring entries so we can cleanup correctly */
1622         for (i = 0; i < nb_rx; ++i) {
1623                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1624         }
1625
1626
1627         return nb_rx;
1628 }
1629
1630 static inline int
1631 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1632 {
1633         volatile union ixgbe_adv_rx_desc *rxdp;
1634         struct ixgbe_rx_entry *rxep;
1635         struct rte_mbuf *mb;
1636         uint16_t alloc_idx;
1637         __le64 dma_addr;
1638         int diag, i;
1639
1640         /* allocate buffers in bulk directly into the S/W ring */
1641         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1642         rxep = &rxq->sw_ring[alloc_idx];
1643         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1644                                     rxq->rx_free_thresh);
1645         if (unlikely(diag != 0))
1646                 return -ENOMEM;
1647
1648         rxdp = &rxq->rx_ring[alloc_idx];
1649         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1650                 /* populate the static rte mbuf fields */
1651                 mb = rxep[i].mbuf;
1652                 if (reset_mbuf) {
1653                         mb->port = rxq->port_id;
1654                 }
1655
1656                 rte_mbuf_refcnt_set(mb, 1);
1657                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1658
1659                 /* populate the descriptors */
1660                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1661                 rxdp[i].read.hdr_addr = 0;
1662                 rxdp[i].read.pkt_addr = dma_addr;
1663         }
1664
1665         /* update state of internal queue structure */
1666         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1667         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1668                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1669
1670         /* no errors */
1671         return 0;
1672 }
1673
1674 static inline uint16_t
1675 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1676                          uint16_t nb_pkts)
1677 {
1678         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1679         int i;
1680
1681         /* how many packets are ready to return? */
1682         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1683
1684         /* copy mbuf pointers to the application's packet list */
1685         for (i = 0; i < nb_pkts; ++i)
1686                 rx_pkts[i] = stage[i];
1687
1688         /* update internal queue state */
1689         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1690         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1691
1692         return nb_pkts;
1693 }
1694
1695 static inline uint16_t
1696 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1697              uint16_t nb_pkts)
1698 {
1699         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1700         uint16_t nb_rx = 0;
1701
1702         /* Any previously recv'd pkts will be returned from the Rx stage */
1703         if (rxq->rx_nb_avail)
1704                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1705
1706         /* Scan the H/W ring for packets to receive */
1707         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1708
1709         /* update internal queue state */
1710         rxq->rx_next_avail = 0;
1711         rxq->rx_nb_avail = nb_rx;
1712         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1713
1714         /* if required, allocate new buffers to replenish descriptors */
1715         if (rxq->rx_tail > rxq->rx_free_trigger) {
1716                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1717
1718                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1719                         int i, j;
1720
1721                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1722                                    "queue_id=%u", (unsigned) rxq->port_id,
1723                                    (unsigned) rxq->queue_id);
1724
1725                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1726                                 rxq->rx_free_thresh;
1727
1728                         /*
1729                          * Need to rewind any previous receives if we cannot
1730                          * allocate new buffers to replenish the old ones.
1731                          */
1732                         rxq->rx_nb_avail = 0;
1733                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1734                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1735                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1736
1737                         return 0;
1738                 }
1739
1740                 /* update tail pointer */
1741                 rte_wmb();
1742                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1743                                             cur_free_trigger);
1744         }
1745
1746         if (rxq->rx_tail >= rxq->nb_rx_desc)
1747                 rxq->rx_tail = 0;
1748
1749         /* received any packets this loop? */
1750         if (rxq->rx_nb_avail)
1751                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1752
1753         return 0;
1754 }
1755
1756 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1757 uint16_t
1758 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1759                            uint16_t nb_pkts)
1760 {
1761         uint16_t nb_rx;
1762
1763         if (unlikely(nb_pkts == 0))
1764                 return 0;
1765
1766         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1767                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1768
1769         /* request is relatively large, chunk it up */
1770         nb_rx = 0;
1771         while (nb_pkts) {
1772                 uint16_t ret, n;
1773
1774                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1775                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1776                 nb_rx = (uint16_t)(nb_rx + ret);
1777                 nb_pkts = (uint16_t)(nb_pkts - ret);
1778                 if (ret < n)
1779                         break;
1780         }
1781
1782         return nb_rx;
1783 }
1784
1785 uint16_t
1786 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1787                 uint16_t nb_pkts)
1788 {
1789         struct ixgbe_rx_queue *rxq;
1790         volatile union ixgbe_adv_rx_desc *rx_ring;
1791         volatile union ixgbe_adv_rx_desc *rxdp;
1792         struct ixgbe_rx_entry *sw_ring;
1793         struct ixgbe_rx_entry *rxe;
1794         struct rte_mbuf *rxm;
1795         struct rte_mbuf *nmb;
1796         union ixgbe_adv_rx_desc rxd;
1797         uint64_t dma_addr;
1798         uint32_t staterr;
1799         uint32_t pkt_info;
1800         uint16_t pkt_len;
1801         uint16_t rx_id;
1802         uint16_t nb_rx;
1803         uint16_t nb_hold;
1804         uint64_t pkt_flags;
1805         uint64_t vlan_flags;
1806
1807         nb_rx = 0;
1808         nb_hold = 0;
1809         rxq = rx_queue;
1810         rx_id = rxq->rx_tail;
1811         rx_ring = rxq->rx_ring;
1812         sw_ring = rxq->sw_ring;
1813         vlan_flags = rxq->vlan_flags;
1814         while (nb_rx < nb_pkts) {
1815                 /*
1816                  * The order of operations here is important as the DD status
1817                  * bit must not be read after any other descriptor fields.
1818                  * rx_ring and rxdp are pointing to volatile data so the order
1819                  * of accesses cannot be reordered by the compiler. If they were
1820                  * not volatile, they could be reordered which could lead to
1821                  * using invalid descriptor fields when read from rxd.
1822                  */
1823                 rxdp = &rx_ring[rx_id];
1824                 staterr = rxdp->wb.upper.status_error;
1825                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1826                         break;
1827                 rxd = *rxdp;
1828
1829                 /*
1830                  * End of packet.
1831                  *
1832                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1833                  * is likely to be invalid and to be dropped by the various
1834                  * validation checks performed by the network stack.
1835                  *
1836                  * Allocate a new mbuf to replenish the RX ring descriptor.
1837                  * If the allocation fails:
1838                  *    - arrange for that RX descriptor to be the first one
1839                  *      being parsed the next time the receive function is
1840                  *      invoked [on the same queue].
1841                  *
1842                  *    - Stop parsing the RX ring and return immediately.
1843                  *
1844                  * This policy do not drop the packet received in the RX
1845                  * descriptor for which the allocation of a new mbuf failed.
1846                  * Thus, it allows that packet to be later retrieved if
1847                  * mbuf have been freed in the mean time.
1848                  * As a side effect, holding RX descriptors instead of
1849                  * systematically giving them back to the NIC may lead to
1850                  * RX ring exhaustion situations.
1851                  * However, the NIC can gracefully prevent such situations
1852                  * to happen by sending specific "back-pressure" flow control
1853                  * frames to its peer(s).
1854                  */
1855                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1856                            "ext_err_stat=0x%08x pkt_len=%u",
1857                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1858                            (unsigned) rx_id, (unsigned) staterr,
1859                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1860
1861                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1862                 if (nmb == NULL) {
1863                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1864                                    "queue_id=%u", (unsigned) rxq->port_id,
1865                                    (unsigned) rxq->queue_id);
1866                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1867                         break;
1868                 }
1869
1870                 nb_hold++;
1871                 rxe = &sw_ring[rx_id];
1872                 rx_id++;
1873                 if (rx_id == rxq->nb_rx_desc)
1874                         rx_id = 0;
1875
1876                 /* Prefetch next mbuf while processing current one. */
1877                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1878
1879                 /*
1880                  * When next RX descriptor is on a cache-line boundary,
1881                  * prefetch the next 4 RX descriptors and the next 8 pointers
1882                  * to mbufs.
1883                  */
1884                 if ((rx_id & 0x3) == 0) {
1885                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1886                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1887                 }
1888
1889                 rxm = rxe->mbuf;
1890                 rxe->mbuf = nmb;
1891                 dma_addr =
1892                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1893                 rxdp->read.hdr_addr = 0;
1894                 rxdp->read.pkt_addr = dma_addr;
1895
1896                 /*
1897                  * Initialize the returned mbuf.
1898                  * 1) setup generic mbuf fields:
1899                  *    - number of segments,
1900                  *    - next segment,
1901                  *    - packet length,
1902                  *    - RX port identifier.
1903                  * 2) integrate hardware offload data, if any:
1904                  *    - RSS flag & hash,
1905                  *    - IP checksum flag,
1906                  *    - VLAN TCI, if any,
1907                  *    - error flags.
1908                  */
1909                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1910                                       rxq->crc_len);
1911                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1912                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1913                 rxm->nb_segs = 1;
1914                 rxm->next = NULL;
1915                 rxm->pkt_len = pkt_len;
1916                 rxm->data_len = pkt_len;
1917                 rxm->port = rxq->port_id;
1918
1919                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1920                 /* Only valid if RTE_MBUF_F_RX_VLAN set in pkt_flags */
1921                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1922
1923                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1924                 pkt_flags = pkt_flags |
1925                         rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
1926                                                    rxq->rx_udp_csum_zero_err);
1927                 pkt_flags = pkt_flags |
1928                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1929                 rxm->ol_flags = pkt_flags;
1930                 rxm->packet_type =
1931                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1932                                                        rxq->pkt_type_mask);
1933
1934                 if (likely(pkt_flags & RTE_MBUF_F_RX_RSS_HASH))
1935                         rxm->hash.rss = rte_le_to_cpu_32(
1936                                                 rxd.wb.lower.hi_dword.rss);
1937                 else if (pkt_flags & RTE_MBUF_F_RX_FDIR) {
1938                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1939                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1940                                         IXGBE_ATR_HASH_MASK;
1941                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1942                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1943                 }
1944                 /*
1945                  * Store the mbuf address into the next entry of the array
1946                  * of returned packets.
1947                  */
1948                 rx_pkts[nb_rx++] = rxm;
1949         }
1950         rxq->rx_tail = rx_id;
1951
1952         /*
1953          * If the number of free RX descriptors is greater than the RX free
1954          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1955          * register.
1956          * Update the RDT with the value of the last processed RX descriptor
1957          * minus 1, to guarantee that the RDT register is never equal to the
1958          * RDH register, which creates a "full" ring situtation from the
1959          * hardware point of view...
1960          */
1961         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1962         if (nb_hold > rxq->rx_free_thresh) {
1963                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1964                            "nb_hold=%u nb_rx=%u",
1965                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1966                            (unsigned) rx_id, (unsigned) nb_hold,
1967                            (unsigned) nb_rx);
1968                 rx_id = (uint16_t) ((rx_id == 0) ?
1969                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1970                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1971                 nb_hold = 0;
1972         }
1973         rxq->nb_rx_hold = nb_hold;
1974         return nb_rx;
1975 }
1976
1977 /**
1978  * Detect an RSC descriptor.
1979  */
1980 static inline uint32_t
1981 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1982 {
1983         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1984                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1985 }
1986
1987 /**
1988  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1989  *
1990  * Fill the following info in the HEAD buffer of the Rx cluster:
1991  *    - RX port identifier
1992  *    - hardware offload data, if any:
1993  *      - RSS flag & hash
1994  *      - IP checksum flag
1995  *      - VLAN TCI, if any
1996  *      - error flags
1997  * @head HEAD of the packet cluster
1998  * @desc HW descriptor to get data from
1999  * @rxq Pointer to the Rx queue
2000  */
2001 static inline void
2002 ixgbe_fill_cluster_head_buf(
2003         struct rte_mbuf *head,
2004         union ixgbe_adv_rx_desc *desc,
2005         struct ixgbe_rx_queue *rxq,
2006         uint32_t staterr)
2007 {
2008         uint32_t pkt_info;
2009         uint64_t pkt_flags;
2010
2011         head->port = rxq->port_id;
2012
2013         /* The vlan_tci field is only valid when RTE_MBUF_F_RX_VLAN is
2014          * set in the pkt_flags field.
2015          */
2016         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
2017         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
2018         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
2019         pkt_flags |= rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
2020                                                 rxq->rx_udp_csum_zero_err);
2021         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2022         head->ol_flags = pkt_flags;
2023         head->packet_type =
2024                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2025
2026         if (likely(pkt_flags & RTE_MBUF_F_RX_RSS_HASH))
2027                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2028         else if (pkt_flags & RTE_MBUF_F_RX_FDIR) {
2029                 head->hash.fdir.hash =
2030                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2031                                                           & IXGBE_ATR_HASH_MASK;
2032                 head->hash.fdir.id =
2033                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2034         }
2035 }
2036
2037 /**
2038  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2039  *
2040  * @rx_queue Rx queue handle
2041  * @rx_pkts table of received packets
2042  * @nb_pkts size of rx_pkts table
2043  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2044  *
2045  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2046  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2047  *
2048  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2049  * 1) When non-EOP RSC completion arrives:
2050  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2051  *       segment's data length.
2052  *    b) Set the "next" pointer of the current segment to point to the segment
2053  *       at the NEXTP index.
2054  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2055  *       in the sw_rsc_ring.
2056  * 2) When EOP arrives we just update the cluster's total length and offload
2057  *    flags and deliver the cluster up to the upper layers. In our case - put it
2058  *    in the rx_pkts table.
2059  *
2060  * Returns the number of received packets/clusters (according to the "bulk
2061  * receive" interface).
2062  */
2063 static inline uint16_t
2064 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2065                     bool bulk_alloc)
2066 {
2067         struct ixgbe_rx_queue *rxq = rx_queue;
2068         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2069         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2070         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2071         uint16_t rx_id = rxq->rx_tail;
2072         uint16_t nb_rx = 0;
2073         uint16_t nb_hold = rxq->nb_rx_hold;
2074         uint16_t prev_id = rxq->rx_tail;
2075
2076         while (nb_rx < nb_pkts) {
2077                 bool eop;
2078                 struct ixgbe_rx_entry *rxe;
2079                 struct ixgbe_scattered_rx_entry *sc_entry;
2080                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2081                 struct ixgbe_rx_entry *next_rxe = NULL;
2082                 struct rte_mbuf *first_seg;
2083                 struct rte_mbuf *rxm;
2084                 struct rte_mbuf *nmb = NULL;
2085                 union ixgbe_adv_rx_desc rxd;
2086                 uint16_t data_len;
2087                 uint16_t next_id;
2088                 volatile union ixgbe_adv_rx_desc *rxdp;
2089                 uint32_t staterr;
2090
2091 next_desc:
2092                 /*
2093                  * The code in this whole file uses the volatile pointer to
2094                  * ensure the read ordering of the status and the rest of the
2095                  * descriptor fields (on the compiler level only!!!). This is so
2096                  * UGLY - why not to just use the compiler barrier instead? DPDK
2097                  * even has the rte_compiler_barrier() for that.
2098                  *
2099                  * But most importantly this is just wrong because this doesn't
2100                  * ensure memory ordering in a general case at all. For
2101                  * instance, DPDK is supposed to work on Power CPUs where
2102                  * compiler barrier may just not be enough!
2103                  *
2104                  * I tried to write only this function properly to have a
2105                  * starting point (as a part of an LRO/RSC series) but the
2106                  * compiler cursed at me when I tried to cast away the
2107                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2108                  * keeping it the way it is for now.
2109                  *
2110                  * The code in this file is broken in so many other places and
2111                  * will just not work on a big endian CPU anyway therefore the
2112                  * lines below will have to be revisited together with the rest
2113                  * of the ixgbe PMD.
2114                  *
2115                  * TODO:
2116                  *    - Get rid of "volatile" and let the compiler do its job.
2117                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2118                  *      memory ordering below.
2119                  */
2120                 rxdp = &rx_ring[rx_id];
2121                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2122
2123                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2124                         break;
2125
2126                 rxd = *rxdp;
2127
2128                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2129                                   "staterr=0x%x data_len=%u",
2130                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2131                            rte_le_to_cpu_16(rxd.wb.upper.length));
2132
2133                 if (!bulk_alloc) {
2134                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2135                         if (nmb == NULL) {
2136                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2137                                                   "port_id=%u queue_id=%u",
2138                                            rxq->port_id, rxq->queue_id);
2139
2140                                 rte_eth_devices[rxq->port_id].data->
2141                                                         rx_mbuf_alloc_failed++;
2142                                 break;
2143                         }
2144                 } else if (nb_hold > rxq->rx_free_thresh) {
2145                         uint16_t next_rdt = rxq->rx_free_trigger;
2146
2147                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2148                                 rte_wmb();
2149                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2150                                                         rxq->rdt_reg_addr,
2151                                                         next_rdt);
2152                                 nb_hold -= rxq->rx_free_thresh;
2153                         } else {
2154                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2155                                                   "port_id=%u queue_id=%u",
2156                                            rxq->port_id, rxq->queue_id);
2157
2158                                 rte_eth_devices[rxq->port_id].data->
2159                                                         rx_mbuf_alloc_failed++;
2160                                 break;
2161                         }
2162                 }
2163
2164                 nb_hold++;
2165                 rxe = &sw_ring[rx_id];
2166                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2167
2168                 next_id = rx_id + 1;
2169                 if (next_id == rxq->nb_rx_desc)
2170                         next_id = 0;
2171
2172                 /* Prefetch next mbuf while processing current one. */
2173                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2174
2175                 /*
2176                  * When next RX descriptor is on a cache-line boundary,
2177                  * prefetch the next 4 RX descriptors and the next 4 pointers
2178                  * to mbufs.
2179                  */
2180                 if ((next_id & 0x3) == 0) {
2181                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2182                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2183                 }
2184
2185                 rxm = rxe->mbuf;
2186
2187                 if (!bulk_alloc) {
2188                         __le64 dma =
2189                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2190                         /*
2191                          * Update RX descriptor with the physical address of the
2192                          * new data buffer of the new allocated mbuf.
2193                          */
2194                         rxe->mbuf = nmb;
2195
2196                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2197                         rxdp->read.hdr_addr = 0;
2198                         rxdp->read.pkt_addr = dma;
2199                 } else
2200                         rxe->mbuf = NULL;
2201
2202                 /*
2203                  * Set data length & data buffer address of mbuf.
2204                  */
2205                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2206                 rxm->data_len = data_len;
2207
2208                 if (!eop) {
2209                         uint16_t nextp_id;
2210                         /*
2211                          * Get next descriptor index:
2212                          *  - For RSC it's in the NEXTP field.
2213                          *  - For a scattered packet - it's just a following
2214                          *    descriptor.
2215                          */
2216                         if (ixgbe_rsc_count(&rxd))
2217                                 nextp_id =
2218                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2219                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2220                         else
2221                                 nextp_id = next_id;
2222
2223                         next_sc_entry = &sw_sc_ring[nextp_id];
2224                         next_rxe = &sw_ring[nextp_id];
2225                         rte_ixgbe_prefetch(next_rxe);
2226                 }
2227
2228                 sc_entry = &sw_sc_ring[rx_id];
2229                 first_seg = sc_entry->fbuf;
2230                 sc_entry->fbuf = NULL;
2231
2232                 /*
2233                  * If this is the first buffer of the received packet,
2234                  * set the pointer to the first mbuf of the packet and
2235                  * initialize its context.
2236                  * Otherwise, update the total length and the number of segments
2237                  * of the current scattered packet, and update the pointer to
2238                  * the last mbuf of the current packet.
2239                  */
2240                 if (first_seg == NULL) {
2241                         first_seg = rxm;
2242                         first_seg->pkt_len = data_len;
2243                         first_seg->nb_segs = 1;
2244                 } else {
2245                         first_seg->pkt_len += data_len;
2246                         first_seg->nb_segs++;
2247                 }
2248
2249                 prev_id = rx_id;
2250                 rx_id = next_id;
2251
2252                 /*
2253                  * If this is not the last buffer of the received packet, update
2254                  * the pointer to the first mbuf at the NEXTP entry in the
2255                  * sw_sc_ring and continue to parse the RX ring.
2256                  */
2257                 if (!eop && next_rxe) {
2258                         rxm->next = next_rxe->mbuf;
2259                         next_sc_entry->fbuf = first_seg;
2260                         goto next_desc;
2261                 }
2262
2263                 /* Initialize the first mbuf of the returned packet */
2264                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2265
2266                 /*
2267                  * Deal with the case, when HW CRC srip is disabled.
2268                  * That can't happen when LRO is enabled, but still could
2269                  * happen for scattered RX mode.
2270                  */
2271                 first_seg->pkt_len -= rxq->crc_len;
2272                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2273                         struct rte_mbuf *lp;
2274
2275                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2276                                 ;
2277
2278                         first_seg->nb_segs--;
2279                         lp->data_len -= rxq->crc_len - rxm->data_len;
2280                         lp->next = NULL;
2281                         rte_pktmbuf_free_seg(rxm);
2282                 } else
2283                         rxm->data_len -= rxq->crc_len;
2284
2285                 /* Prefetch data of first segment, if configured to do so. */
2286                 rte_packet_prefetch((char *)first_seg->buf_addr +
2287                         first_seg->data_off);
2288
2289                 /*
2290                  * Store the mbuf address into the next entry of the array
2291                  * of returned packets.
2292                  */
2293                 rx_pkts[nb_rx++] = first_seg;
2294         }
2295
2296         /*
2297          * Record index of the next RX descriptor to probe.
2298          */
2299         rxq->rx_tail = rx_id;
2300
2301         /*
2302          * If the number of free RX descriptors is greater than the RX free
2303          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2304          * register.
2305          * Update the RDT with the value of the last processed RX descriptor
2306          * minus 1, to guarantee that the RDT register is never equal to the
2307          * RDH register, which creates a "full" ring situtation from the
2308          * hardware point of view...
2309          */
2310         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2311                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2312                            "nb_hold=%u nb_rx=%u",
2313                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2314
2315                 rte_wmb();
2316                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2317                 nb_hold = 0;
2318         }
2319
2320         rxq->nb_rx_hold = nb_hold;
2321         return nb_rx;
2322 }
2323
2324 uint16_t
2325 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2326                                  uint16_t nb_pkts)
2327 {
2328         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2329 }
2330
2331 uint16_t
2332 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2333                                uint16_t nb_pkts)
2334 {
2335         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2336 }
2337
2338 /*********************************************************************
2339  *
2340  *  Queue management functions
2341  *
2342  **********************************************************************/
2343
2344 static void __rte_cold
2345 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2346 {
2347         unsigned i;
2348
2349         if (txq->sw_ring != NULL) {
2350                 for (i = 0; i < txq->nb_tx_desc; i++) {
2351                         if (txq->sw_ring[i].mbuf != NULL) {
2352                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2353                                 txq->sw_ring[i].mbuf = NULL;
2354                         }
2355                 }
2356         }
2357 }
2358
2359 static int
2360 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2361 {
2362         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2363         uint16_t i, tx_last, tx_id;
2364         uint16_t nb_tx_free_last;
2365         uint16_t nb_tx_to_clean;
2366         uint32_t pkt_cnt;
2367
2368         /* Start free mbuf from the next of tx_tail */
2369         tx_last = txq->tx_tail;
2370         tx_id  = swr_ring[tx_last].next_id;
2371
2372         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2373                 return 0;
2374
2375         nb_tx_to_clean = txq->nb_tx_free;
2376         nb_tx_free_last = txq->nb_tx_free;
2377         if (!free_cnt)
2378                 free_cnt = txq->nb_tx_desc;
2379
2380         /* Loop through swr_ring to count the amount of
2381          * freeable mubfs and packets.
2382          */
2383         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2384                 for (i = 0; i < nb_tx_to_clean &&
2385                         pkt_cnt < free_cnt &&
2386                         tx_id != tx_last; i++) {
2387                         if (swr_ring[tx_id].mbuf != NULL) {
2388                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2389                                 swr_ring[tx_id].mbuf = NULL;
2390
2391                                 /*
2392                                  * last segment in the packet,
2393                                  * increment packet count
2394                                  */
2395                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2396                         }
2397
2398                         tx_id = swr_ring[tx_id].next_id;
2399                 }
2400
2401                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2402                         txq->nb_tx_free || tx_id == tx_last)
2403                         break;
2404
2405                 if (pkt_cnt < free_cnt) {
2406                         if (ixgbe_xmit_cleanup(txq))
2407                                 break;
2408
2409                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2410                         nb_tx_free_last = txq->nb_tx_free;
2411                 }
2412         }
2413
2414         return (int)pkt_cnt;
2415 }
2416
2417 static int
2418 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2419                         uint32_t free_cnt)
2420 {
2421         int i, n, cnt;
2422
2423         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2424                 free_cnt = txq->nb_tx_desc;
2425
2426         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2427
2428         for (i = 0; i < cnt; i += n) {
2429                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2430                         break;
2431
2432                 n = ixgbe_tx_free_bufs(txq);
2433
2434                 if (n == 0)
2435                         break;
2436         }
2437
2438         return i;
2439 }
2440
2441 static int
2442 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2443                         uint32_t free_cnt __rte_unused)
2444 {
2445         return -ENOTSUP;
2446 }
2447
2448 int
2449 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2450 {
2451         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2452         if (txq->offloads == 0 &&
2453 #ifdef RTE_LIB_SECURITY
2454                         !(txq->using_ipsec) &&
2455 #endif
2456                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2457                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2458                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2459                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2460                                         txq->sw_ring_v != NULL)) {
2461                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2462                 } else {
2463                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2464                 }
2465         }
2466
2467         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2468 }
2469
2470 static void __rte_cold
2471 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2472 {
2473         if (txq != NULL &&
2474             txq->sw_ring != NULL)
2475                 rte_free(txq->sw_ring);
2476 }
2477
2478 static void __rte_cold
2479 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2480 {
2481         if (txq != NULL && txq->ops != NULL) {
2482                 txq->ops->release_mbufs(txq);
2483                 txq->ops->free_swring(txq);
2484                 rte_free(txq);
2485         }
2486 }
2487
2488 void __rte_cold
2489 ixgbe_dev_tx_queue_release(void *txq)
2490 {
2491         ixgbe_tx_queue_release(txq);
2492 }
2493
2494 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2495 static void __rte_cold
2496 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2497 {
2498         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2499         struct ixgbe_tx_entry *txe = txq->sw_ring;
2500         uint16_t prev, i;
2501
2502         /* Zero out HW ring memory */
2503         for (i = 0; i < txq->nb_tx_desc; i++) {
2504                 txq->tx_ring[i] = zeroed_desc;
2505         }
2506
2507         /* Initialize SW ring entries */
2508         prev = (uint16_t) (txq->nb_tx_desc - 1);
2509         for (i = 0; i < txq->nb_tx_desc; i++) {
2510                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2511
2512                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2513                 txe[i].mbuf = NULL;
2514                 txe[i].last_id = i;
2515                 txe[prev].next_id = i;
2516                 prev = i;
2517         }
2518
2519         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2520         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2521
2522         txq->tx_tail = 0;
2523         txq->nb_tx_used = 0;
2524         /*
2525          * Always allow 1 descriptor to be un-allocated to avoid
2526          * a H/W race condition
2527          */
2528         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2529         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2530         txq->ctx_curr = 0;
2531         memset((void *)&txq->ctx_cache, 0,
2532                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2533 }
2534
2535 static const struct ixgbe_txq_ops def_txq_ops = {
2536         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2537         .free_swring = ixgbe_tx_free_swring,
2538         .reset = ixgbe_reset_tx_queue,
2539 };
2540
2541 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2542  * the queue parameters. Used in tx_queue_setup by primary process and then
2543  * in dev_init by secondary process when attaching to an existing ethdev.
2544  */
2545 void __rte_cold
2546 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2547 {
2548         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2549         if ((txq->offloads == 0) &&
2550 #ifdef RTE_LIB_SECURITY
2551                         !(txq->using_ipsec) &&
2552 #endif
2553                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2554                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2555                 dev->tx_pkt_prepare = NULL;
2556                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2557                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2558                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2559                                         ixgbe_txq_vec_setup(txq) == 0)) {
2560                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2561                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2562                 } else
2563                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2564         } else {
2565                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2566                 PMD_INIT_LOG(DEBUG,
2567                                 " - offloads = 0x%" PRIx64,
2568                                 txq->offloads);
2569                 PMD_INIT_LOG(DEBUG,
2570                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2571                                 (unsigned long)txq->tx_rs_thresh,
2572                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2573                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2574                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2575         }
2576 }
2577
2578 uint64_t
2579 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2580 {
2581         RTE_SET_USED(dev);
2582
2583         return 0;
2584 }
2585
2586 uint64_t
2587 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2588 {
2589         uint64_t tx_offload_capa;
2590         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2591
2592         tx_offload_capa =
2593                 DEV_TX_OFFLOAD_VLAN_INSERT |
2594                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2595                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2596                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2597                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2598                 DEV_TX_OFFLOAD_TCP_TSO     |
2599                 DEV_TX_OFFLOAD_MULTI_SEGS;
2600
2601         if (hw->mac.type == ixgbe_mac_82599EB ||
2602             hw->mac.type == ixgbe_mac_X540)
2603                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2604
2605         if (hw->mac.type == ixgbe_mac_X550 ||
2606             hw->mac.type == ixgbe_mac_X550EM_x ||
2607             hw->mac.type == ixgbe_mac_X550EM_a)
2608                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2609
2610 #ifdef RTE_LIB_SECURITY
2611         if (dev->security_ctx)
2612                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2613 #endif
2614         return tx_offload_capa;
2615 }
2616
2617 int __rte_cold
2618 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2619                          uint16_t queue_idx,
2620                          uint16_t nb_desc,
2621                          unsigned int socket_id,
2622                          const struct rte_eth_txconf *tx_conf)
2623 {
2624         const struct rte_memzone *tz;
2625         struct ixgbe_tx_queue *txq;
2626         struct ixgbe_hw     *hw;
2627         uint16_t tx_rs_thresh, tx_free_thresh;
2628         uint64_t offloads;
2629
2630         PMD_INIT_FUNC_TRACE();
2631         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2632
2633         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2634
2635         /*
2636          * Validate number of transmit descriptors.
2637          * It must not exceed hardware maximum, and must be multiple
2638          * of IXGBE_ALIGN.
2639          */
2640         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2641                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2642                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2643                 return -EINVAL;
2644         }
2645
2646         /*
2647          * The following two parameters control the setting of the RS bit on
2648          * transmit descriptors.
2649          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2650          * descriptors have been used.
2651          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2652          * descriptors are used or if the number of descriptors required
2653          * to transmit a packet is greater than the number of free TX
2654          * descriptors.
2655          * The following constraints must be satisfied:
2656          *  tx_rs_thresh must be greater than 0.
2657          *  tx_rs_thresh must be less than the size of the ring minus 2.
2658          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2659          *  tx_rs_thresh must be a divisor of the ring size.
2660          *  tx_free_thresh must be greater than 0.
2661          *  tx_free_thresh must be less than the size of the ring minus 3.
2662          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2663          * One descriptor in the TX ring is used as a sentinel to avoid a
2664          * H/W race condition, hence the maximum threshold constraints.
2665          * When set to zero use default values.
2666          */
2667         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2668                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2669         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2670         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2671                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2672         if (tx_conf->tx_rs_thresh > 0)
2673                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2674         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2675                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2676                              "exceed nb_desc. (tx_rs_thresh=%u "
2677                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2678                              (unsigned int)tx_rs_thresh,
2679                              (unsigned int)tx_free_thresh,
2680                              (unsigned int)nb_desc,
2681                              (int)dev->data->port_id,
2682                              (int)queue_idx);
2683                 return -(EINVAL);
2684         }
2685         if (tx_rs_thresh >= (nb_desc - 2)) {
2686                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2687                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2688                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2689                         (int)dev->data->port_id, (int)queue_idx);
2690                 return -(EINVAL);
2691         }
2692         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2693                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2694                         "(tx_rs_thresh=%u port=%d queue=%d)",
2695                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2696                         (int)dev->data->port_id, (int)queue_idx);
2697                 return -(EINVAL);
2698         }
2699         if (tx_free_thresh >= (nb_desc - 3)) {
2700                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2701                              "tx_free_thresh must be less than the number of "
2702                              "TX descriptors minus 3. (tx_free_thresh=%u "
2703                              "port=%d queue=%d)",
2704                              (unsigned int)tx_free_thresh,
2705                              (int)dev->data->port_id, (int)queue_idx);
2706                 return -(EINVAL);
2707         }
2708         if (tx_rs_thresh > tx_free_thresh) {
2709                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2710                              "tx_free_thresh. (tx_free_thresh=%u "
2711                              "tx_rs_thresh=%u port=%d queue=%d)",
2712                              (unsigned int)tx_free_thresh,
2713                              (unsigned int)tx_rs_thresh,
2714                              (int)dev->data->port_id,
2715                              (int)queue_idx);
2716                 return -(EINVAL);
2717         }
2718         if ((nb_desc % tx_rs_thresh) != 0) {
2719                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2720                              "number of TX descriptors. (tx_rs_thresh=%u "
2721                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2722                              (int)dev->data->port_id, (int)queue_idx);
2723                 return -(EINVAL);
2724         }
2725
2726         /*
2727          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2728          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2729          * by the NIC and all descriptors are written back after the NIC
2730          * accumulates WTHRESH descriptors.
2731          */
2732         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2733                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2734                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2735                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2736                              (int)dev->data->port_id, (int)queue_idx);
2737                 return -(EINVAL);
2738         }
2739
2740         /* Free memory prior to re-allocation if needed... */
2741         if (dev->data->tx_queues[queue_idx] != NULL) {
2742                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2743                 dev->data->tx_queues[queue_idx] = NULL;
2744         }
2745
2746         /* First allocate the tx queue data structure */
2747         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2748                                  RTE_CACHE_LINE_SIZE, socket_id);
2749         if (txq == NULL)
2750                 return -ENOMEM;
2751
2752         /*
2753          * Allocate TX ring hardware descriptors. A memzone large enough to
2754          * handle the maximum ring size is allocated in order to allow for
2755          * resizing in later calls to the queue setup function.
2756          */
2757         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2758                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2759                         IXGBE_ALIGN, socket_id);
2760         if (tz == NULL) {
2761                 ixgbe_tx_queue_release(txq);
2762                 return -ENOMEM;
2763         }
2764
2765         txq->nb_tx_desc = nb_desc;
2766         txq->tx_rs_thresh = tx_rs_thresh;
2767         txq->tx_free_thresh = tx_free_thresh;
2768         txq->pthresh = tx_conf->tx_thresh.pthresh;
2769         txq->hthresh = tx_conf->tx_thresh.hthresh;
2770         txq->wthresh = tx_conf->tx_thresh.wthresh;
2771         txq->queue_id = queue_idx;
2772         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2773                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2774         txq->port_id = dev->data->port_id;
2775         txq->offloads = offloads;
2776         txq->ops = &def_txq_ops;
2777         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2778 #ifdef RTE_LIB_SECURITY
2779         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2780                         DEV_TX_OFFLOAD_SECURITY);
2781 #endif
2782
2783         /*
2784          * Modification to set VFTDT for virtual function if vf is detected
2785          */
2786         if (hw->mac.type == ixgbe_mac_82599_vf ||
2787             hw->mac.type == ixgbe_mac_X540_vf ||
2788             hw->mac.type == ixgbe_mac_X550_vf ||
2789             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2790             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2791                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2792         else
2793                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2794
2795         txq->tx_ring_phys_addr = tz->iova;
2796         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2797
2798         /* Allocate software ring */
2799         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2800                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2801                                 RTE_CACHE_LINE_SIZE, socket_id);
2802         if (txq->sw_ring == NULL) {
2803                 ixgbe_tx_queue_release(txq);
2804                 return -ENOMEM;
2805         }
2806         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2807                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2808
2809         /* set up vector or scalar TX function as appropriate */
2810         ixgbe_set_tx_function(dev, txq);
2811
2812         txq->ops->reset(txq);
2813
2814         dev->data->tx_queues[queue_idx] = txq;
2815
2816
2817         return 0;
2818 }
2819
2820 /**
2821  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2822  *
2823  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2824  * in the sw_rsc_ring is not set to NULL but rather points to the next
2825  * mbuf of this RSC aggregation (that has not been completed yet and still
2826  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2827  * will just free first "nb_segs" segments of the cluster explicitly by calling
2828  * an rte_pktmbuf_free_seg().
2829  *
2830  * @m scattered cluster head
2831  */
2832 static void __rte_cold
2833 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2834 {
2835         uint16_t i, nb_segs = m->nb_segs;
2836         struct rte_mbuf *next_seg;
2837
2838         for (i = 0; i < nb_segs; i++) {
2839                 next_seg = m->next;
2840                 rte_pktmbuf_free_seg(m);
2841                 m = next_seg;
2842         }
2843 }
2844
2845 static void __rte_cold
2846 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2847 {
2848         unsigned i;
2849
2850         /* SSE Vector driver has a different way of releasing mbufs. */
2851         if (rxq->rx_using_sse) {
2852                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2853                 return;
2854         }
2855
2856         if (rxq->sw_ring != NULL) {
2857                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2858                         if (rxq->sw_ring[i].mbuf != NULL) {
2859                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2860                                 rxq->sw_ring[i].mbuf = NULL;
2861                         }
2862                 }
2863                 if (rxq->rx_nb_avail) {
2864                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2865                                 struct rte_mbuf *mb;
2866
2867                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2868                                 rte_pktmbuf_free_seg(mb);
2869                         }
2870                         rxq->rx_nb_avail = 0;
2871                 }
2872         }
2873
2874         if (rxq->sw_sc_ring)
2875                 for (i = 0; i < rxq->nb_rx_desc; i++)
2876                         if (rxq->sw_sc_ring[i].fbuf) {
2877                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2878                                 rxq->sw_sc_ring[i].fbuf = NULL;
2879                         }
2880 }
2881
2882 static void __rte_cold
2883 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2884 {
2885         if (rxq != NULL) {
2886                 ixgbe_rx_queue_release_mbufs(rxq);
2887                 rte_free(rxq->sw_ring);
2888                 rte_free(rxq->sw_sc_ring);
2889                 rte_free(rxq);
2890         }
2891 }
2892
2893 void __rte_cold
2894 ixgbe_dev_rx_queue_release(void *rxq)
2895 {
2896         ixgbe_rx_queue_release(rxq);
2897 }
2898
2899 /*
2900  * Check if Rx Burst Bulk Alloc function can be used.
2901  * Return
2902  *        0: the preconditions are satisfied and the bulk allocation function
2903  *           can be used.
2904  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2905  *           function must be used.
2906  */
2907 static inline int __rte_cold
2908 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2909 {
2910         int ret = 0;
2911
2912         /*
2913          * Make sure the following pre-conditions are satisfied:
2914          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2915          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2916          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2917          * Scattered packets are not supported.  This should be checked
2918          * outside of this function.
2919          */
2920         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2921                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2922                              "rxq->rx_free_thresh=%d, "
2923                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2924                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2925                 ret = -EINVAL;
2926         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2927                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2928                              "rxq->rx_free_thresh=%d, "
2929                              "rxq->nb_rx_desc=%d",
2930                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2931                 ret = -EINVAL;
2932         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2933                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2934                              "rxq->nb_rx_desc=%d, "
2935                              "rxq->rx_free_thresh=%d",
2936                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2937                 ret = -EINVAL;
2938         }
2939
2940         return ret;
2941 }
2942
2943 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2944 static void __rte_cold
2945 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2946 {
2947         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2948         unsigned i;
2949         uint16_t len = rxq->nb_rx_desc;
2950
2951         /*
2952          * By default, the Rx queue setup function allocates enough memory for
2953          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2954          * extra memory at the end of the descriptor ring to be zero'd out.
2955          */
2956         if (adapter->rx_bulk_alloc_allowed)
2957                 /* zero out extra memory */
2958                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2959
2960         /*
2961          * Zero out HW ring memory. Zero out extra memory at the end of
2962          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2963          * reads extra memory as zeros.
2964          */
2965         for (i = 0; i < len; i++) {
2966                 rxq->rx_ring[i] = zeroed_desc;
2967         }
2968
2969         /*
2970          * initialize extra software ring entries. Space for these extra
2971          * entries is always allocated
2972          */
2973         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2974         for (i = rxq->nb_rx_desc; i < len; ++i) {
2975                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2976         }
2977
2978         rxq->rx_nb_avail = 0;
2979         rxq->rx_next_avail = 0;
2980         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2981         rxq->rx_tail = 0;
2982         rxq->nb_rx_hold = 0;
2983         rxq->pkt_first_seg = NULL;
2984         rxq->pkt_last_seg = NULL;
2985
2986 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2987         rxq->rxrearm_start = 0;
2988         rxq->rxrearm_nb = 0;
2989 #endif
2990 }
2991
2992 static int
2993 ixgbe_is_vf(struct rte_eth_dev *dev)
2994 {
2995         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2996
2997         switch (hw->mac.type) {
2998         case ixgbe_mac_82599_vf:
2999         case ixgbe_mac_X540_vf:
3000         case ixgbe_mac_X550_vf:
3001         case ixgbe_mac_X550EM_x_vf:
3002         case ixgbe_mac_X550EM_a_vf:
3003                 return 1;
3004         default:
3005                 return 0;
3006         }
3007 }
3008
3009 uint64_t
3010 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
3011 {
3012         uint64_t offloads = 0;
3013         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3014
3015         if (hw->mac.type != ixgbe_mac_82598EB)
3016                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3017
3018         return offloads;
3019 }
3020
3021 uint64_t
3022 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3023 {
3024         uint64_t offloads;
3025         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3026
3027         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
3028                    DEV_RX_OFFLOAD_UDP_CKSUM   |
3029                    DEV_RX_OFFLOAD_TCP_CKSUM   |
3030                    DEV_RX_OFFLOAD_KEEP_CRC    |
3031                    DEV_RX_OFFLOAD_JUMBO_FRAME |
3032                    DEV_RX_OFFLOAD_VLAN_FILTER |
3033                    DEV_RX_OFFLOAD_SCATTER |
3034                    DEV_RX_OFFLOAD_RSS_HASH;
3035
3036         if (hw->mac.type == ixgbe_mac_82598EB)
3037                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3038
3039         if (ixgbe_is_vf(dev) == 0)
3040                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
3041
3042         /*
3043          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3044          * mode.
3045          */
3046         if ((hw->mac.type == ixgbe_mac_82599EB ||
3047              hw->mac.type == ixgbe_mac_X540 ||
3048              hw->mac.type == ixgbe_mac_X550) &&
3049             !RTE_ETH_DEV_SRIOV(dev).active)
3050                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
3051
3052         if (hw->mac.type == ixgbe_mac_82599EB ||
3053             hw->mac.type == ixgbe_mac_X540)
3054                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3055
3056         if (hw->mac.type == ixgbe_mac_X550 ||
3057             hw->mac.type == ixgbe_mac_X550EM_x ||
3058             hw->mac.type == ixgbe_mac_X550EM_a)
3059                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3060
3061 #ifdef RTE_LIB_SECURITY
3062         if (dev->security_ctx)
3063                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3064 #endif
3065
3066         return offloads;
3067 }
3068
3069 int __rte_cold
3070 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3071                          uint16_t queue_idx,
3072                          uint16_t nb_desc,
3073                          unsigned int socket_id,
3074                          const struct rte_eth_rxconf *rx_conf,
3075                          struct rte_mempool *mp)
3076 {
3077         const struct rte_memzone *rz;
3078         struct ixgbe_rx_queue *rxq;
3079         struct ixgbe_hw     *hw;
3080         uint16_t len;
3081         struct ixgbe_adapter *adapter = dev->data->dev_private;
3082         uint64_t offloads;
3083
3084         PMD_INIT_FUNC_TRACE();
3085         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3086
3087         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3088
3089         /*
3090          * Validate number of receive descriptors.
3091          * It must not exceed hardware maximum, and must be multiple
3092          * of IXGBE_ALIGN.
3093          */
3094         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3095                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3096                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3097                 return -EINVAL;
3098         }
3099
3100         /* Free memory prior to re-allocation if needed... */
3101         if (dev->data->rx_queues[queue_idx] != NULL) {
3102                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3103                 dev->data->rx_queues[queue_idx] = NULL;
3104         }
3105
3106         /* First allocate the rx queue data structure */
3107         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3108                                  RTE_CACHE_LINE_SIZE, socket_id);
3109         if (rxq == NULL)
3110                 return -ENOMEM;
3111         rxq->mb_pool = mp;
3112         rxq->nb_rx_desc = nb_desc;
3113         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3114         rxq->queue_id = queue_idx;
3115         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3116                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3117         rxq->port_id = dev->data->port_id;
3118         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3119                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3120         else
3121                 rxq->crc_len = 0;
3122         rxq->drop_en = rx_conf->rx_drop_en;
3123         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3124         rxq->offloads = offloads;
3125
3126         /*
3127          * The packet type in RX descriptor is different for different NICs.
3128          * Some bits are used for x550 but reserved for other NICS.
3129          * So set different masks for different NICs.
3130          */
3131         if (hw->mac.type == ixgbe_mac_X550 ||
3132             hw->mac.type == ixgbe_mac_X550EM_x ||
3133             hw->mac.type == ixgbe_mac_X550EM_a ||
3134             hw->mac.type == ixgbe_mac_X550_vf ||
3135             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3136             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3137                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3138         else
3139                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3140
3141         /*
3142          * 82599 errata, UDP frames with a 0 checksum can be marked as checksum
3143          * errors.
3144          */
3145         if (hw->mac.type == ixgbe_mac_82599EB)
3146                 rxq->rx_udp_csum_zero_err = 1;
3147
3148         /*
3149          * Allocate RX ring hardware descriptors. A memzone large enough to
3150          * handle the maximum ring size is allocated in order to allow for
3151          * resizing in later calls to the queue setup function.
3152          */
3153         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3154                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3155         if (rz == NULL) {
3156                 ixgbe_rx_queue_release(rxq);
3157                 return -ENOMEM;
3158         }
3159
3160         /*
3161          * Zero init all the descriptors in the ring.
3162          */
3163         memset(rz->addr, 0, RX_RING_SZ);
3164
3165         /*
3166          * Modified to setup VFRDT for Virtual Function
3167          */
3168         if (hw->mac.type == ixgbe_mac_82599_vf ||
3169             hw->mac.type == ixgbe_mac_X540_vf ||
3170             hw->mac.type == ixgbe_mac_X550_vf ||
3171             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3172             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3173                 rxq->rdt_reg_addr =
3174                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3175                 rxq->rdh_reg_addr =
3176                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3177         } else {
3178                 rxq->rdt_reg_addr =
3179                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3180                 rxq->rdh_reg_addr =
3181                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3182         }
3183
3184         rxq->rx_ring_phys_addr = rz->iova;
3185         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3186
3187         /*
3188          * Certain constraints must be met in order to use the bulk buffer
3189          * allocation Rx burst function. If any of Rx queues doesn't meet them
3190          * the feature should be disabled for the whole port.
3191          */
3192         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3193                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3194                                     "preconditions - canceling the feature for "
3195                                     "the whole port[%d]",
3196                              rxq->queue_id, rxq->port_id);
3197                 adapter->rx_bulk_alloc_allowed = false;
3198         }
3199
3200         /*
3201          * Allocate software ring. Allow for space at the end of the
3202          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3203          * function does not access an invalid memory region.
3204          */
3205         len = nb_desc;
3206         if (adapter->rx_bulk_alloc_allowed)
3207                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3208
3209         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3210                                           sizeof(struct ixgbe_rx_entry) * len,
3211                                           RTE_CACHE_LINE_SIZE, socket_id);
3212         if (!rxq->sw_ring) {
3213                 ixgbe_rx_queue_release(rxq);
3214                 return -ENOMEM;
3215         }
3216
3217         /*
3218          * Always allocate even if it's not going to be needed in order to
3219          * simplify the code.
3220          *
3221          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3222          * be requested in ixgbe_dev_rx_init(), which is called later from
3223          * dev_start() flow.
3224          */
3225         rxq->sw_sc_ring =
3226                 rte_zmalloc_socket("rxq->sw_sc_ring",
3227                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3228                                    RTE_CACHE_LINE_SIZE, socket_id);
3229         if (!rxq->sw_sc_ring) {
3230                 ixgbe_rx_queue_release(rxq);
3231                 return -ENOMEM;
3232         }
3233
3234         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3235                             "dma_addr=0x%"PRIx64,
3236                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3237                      rxq->rx_ring_phys_addr);
3238
3239         if (!rte_is_power_of_2(nb_desc)) {
3240                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3241                                     "preconditions - canceling the feature for "
3242                                     "the whole port[%d]",
3243                              rxq->queue_id, rxq->port_id);
3244                 adapter->rx_vec_allowed = false;
3245         } else
3246                 ixgbe_rxq_vec_setup(rxq);
3247
3248         dev->data->rx_queues[queue_idx] = rxq;
3249
3250         ixgbe_reset_rx_queue(adapter, rxq);
3251
3252         return 0;
3253 }
3254
3255 uint32_t
3256 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3257 {
3258 #define IXGBE_RXQ_SCAN_INTERVAL 4
3259         volatile union ixgbe_adv_rx_desc *rxdp;
3260         struct ixgbe_rx_queue *rxq;
3261         uint32_t desc = 0;
3262
3263         rxq = dev->data->rx_queues[rx_queue_id];
3264         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3265
3266         while ((desc < rxq->nb_rx_desc) &&
3267                 (rxdp->wb.upper.status_error &
3268                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3269                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3270                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3271                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3272                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3273                                 desc - rxq->nb_rx_desc]);
3274         }
3275
3276         return desc;
3277 }
3278
3279 int
3280 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3281 {
3282         volatile union ixgbe_adv_rx_desc *rxdp;
3283         struct ixgbe_rx_queue *rxq = rx_queue;
3284         uint32_t desc;
3285
3286         if (unlikely(offset >= rxq->nb_rx_desc))
3287                 return 0;
3288         desc = rxq->rx_tail + offset;
3289         if (desc >= rxq->nb_rx_desc)
3290                 desc -= rxq->nb_rx_desc;
3291
3292         rxdp = &rxq->rx_ring[desc];
3293         return !!(rxdp->wb.upper.status_error &
3294                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3295 }
3296
3297 int
3298 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3299 {
3300         struct ixgbe_rx_queue *rxq = rx_queue;
3301         volatile uint32_t *status;
3302         uint32_t nb_hold, desc;
3303
3304         if (unlikely(offset >= rxq->nb_rx_desc))
3305                 return -EINVAL;
3306
3307 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3308         if (rxq->rx_using_sse)
3309                 nb_hold = rxq->rxrearm_nb;
3310         else
3311 #endif
3312                 nb_hold = rxq->nb_rx_hold;
3313         if (offset >= rxq->nb_rx_desc - nb_hold)
3314                 return RTE_ETH_RX_DESC_UNAVAIL;
3315
3316         desc = rxq->rx_tail + offset;
3317         if (desc >= rxq->nb_rx_desc)
3318                 desc -= rxq->nb_rx_desc;
3319
3320         status = &rxq->rx_ring[desc].wb.upper.status_error;
3321         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3322                 return RTE_ETH_RX_DESC_DONE;
3323
3324         return RTE_ETH_RX_DESC_AVAIL;
3325 }
3326
3327 int
3328 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3329 {
3330         struct ixgbe_tx_queue *txq = tx_queue;
3331         volatile uint32_t *status;
3332         uint32_t desc;
3333
3334         if (unlikely(offset >= txq->nb_tx_desc))
3335                 return -EINVAL;
3336
3337         desc = txq->tx_tail + offset;
3338         /* go to next desc that has the RS bit */
3339         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3340                 txq->tx_rs_thresh;
3341         if (desc >= txq->nb_tx_desc) {
3342                 desc -= txq->nb_tx_desc;
3343                 if (desc >= txq->nb_tx_desc)
3344                         desc -= txq->nb_tx_desc;
3345         }
3346
3347         status = &txq->tx_ring[desc].wb.status;
3348         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3349                 return RTE_ETH_TX_DESC_DONE;
3350
3351         return RTE_ETH_TX_DESC_FULL;
3352 }
3353
3354 /*
3355  * Set up link loopback for X540/X550 mode Tx->Rx.
3356  */
3357 static inline void __rte_cold
3358 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3359 {
3360         uint32_t macc;
3361         PMD_INIT_FUNC_TRACE();
3362
3363         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3364
3365         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3366                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3367         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3368
3369         if (enable) {
3370                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3371                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3372                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3373                 macc |= IXGBE_MACC_FLU;
3374         } else {
3375                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3376                 macc &= ~IXGBE_MACC_FLU;
3377         }
3378
3379         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3380                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3381
3382         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3383 }
3384
3385 void __rte_cold
3386 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3387 {
3388         unsigned i;
3389         struct ixgbe_adapter *adapter = dev->data->dev_private;
3390         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3391
3392         PMD_INIT_FUNC_TRACE();
3393
3394         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3395                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3396
3397                 if (txq != NULL) {
3398                         txq->ops->release_mbufs(txq);
3399                         txq->ops->reset(txq);
3400                 }
3401         }
3402
3403         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3404                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3405
3406                 if (rxq != NULL) {
3407                         ixgbe_rx_queue_release_mbufs(rxq);
3408                         ixgbe_reset_rx_queue(adapter, rxq);
3409                 }
3410         }
3411         /* If loopback mode was enabled, reconfigure the link accordingly */
3412         if (dev->data->dev_conf.lpbk_mode != 0) {
3413                 if (hw->mac.type == ixgbe_mac_X540 ||
3414                      hw->mac.type == ixgbe_mac_X550 ||
3415                      hw->mac.type == ixgbe_mac_X550EM_x ||
3416                      hw->mac.type == ixgbe_mac_X550EM_a)
3417                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3418         }
3419 }
3420
3421 void
3422 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3423 {
3424         unsigned i;
3425
3426         PMD_INIT_FUNC_TRACE();
3427
3428         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3429                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3430                 dev->data->rx_queues[i] = NULL;
3431                 rte_eth_dma_zone_free(dev, "rx_ring", i);
3432         }
3433         dev->data->nb_rx_queues = 0;
3434
3435         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3436                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3437                 dev->data->tx_queues[i] = NULL;
3438                 rte_eth_dma_zone_free(dev, "tx_ring", i);
3439         }
3440         dev->data->nb_tx_queues = 0;
3441 }
3442
3443 /*********************************************************************
3444  *
3445  *  Device RX/TX init functions
3446  *
3447  **********************************************************************/
3448
3449 /**
3450  * Receive Side Scaling (RSS)
3451  * See section 7.1.2.8 in the following document:
3452  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3453  *
3454  * Principles:
3455  * The source and destination IP addresses of the IP header and the source
3456  * and destination ports of TCP/UDP headers, if any, of received packets are
3457  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3458  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3459  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3460  * RSS output index which is used as the RX queue index where to store the
3461  * received packets.
3462  * The following output is supplied in the RX write-back descriptor:
3463  *     - 32-bit result of the Microsoft RSS hash function,
3464  *     - 4-bit RSS type field.
3465  */
3466
3467 /*
3468  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3469  * Used as the default key.
3470  */
3471 static uint8_t rss_intel_key[40] = {
3472         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3473         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3474         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3475         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3476         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3477 };
3478
3479 static void
3480 ixgbe_rss_disable(struct rte_eth_dev *dev)
3481 {
3482         struct ixgbe_hw *hw;
3483         uint32_t mrqc;
3484         uint32_t mrqc_reg;
3485
3486         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3487         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3488         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3489         mrqc &= ~IXGBE_MRQC_RSSEN;
3490         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3491 }
3492
3493 static void
3494 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3495 {
3496         uint8_t  *hash_key;
3497         uint32_t mrqc;
3498         uint32_t rss_key;
3499         uint64_t rss_hf;
3500         uint16_t i;
3501         uint32_t mrqc_reg;
3502         uint32_t rssrk_reg;
3503
3504         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3505         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3506
3507         hash_key = rss_conf->rss_key;
3508         if (hash_key != NULL) {
3509                 /* Fill in RSS hash key */
3510                 for (i = 0; i < 10; i++) {
3511                         rss_key  = hash_key[(i * 4)];
3512                         rss_key |= hash_key[(i * 4) + 1] << 8;
3513                         rss_key |= hash_key[(i * 4) + 2] << 16;
3514                         rss_key |= hash_key[(i * 4) + 3] << 24;
3515                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3516                 }
3517         }
3518
3519         /* Set configured hashing protocols in MRQC register */
3520         rss_hf = rss_conf->rss_hf;
3521         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3522         if (rss_hf & ETH_RSS_IPV4)
3523                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3524         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3525                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3526         if (rss_hf & ETH_RSS_IPV6)
3527                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3528         if (rss_hf & ETH_RSS_IPV6_EX)
3529                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3530         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3531                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3532         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3533                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3534         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3535                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3536         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3537                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3538         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3539                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3540         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3541 }
3542
3543 int
3544 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3545                           struct rte_eth_rss_conf *rss_conf)
3546 {
3547         struct ixgbe_hw *hw;
3548         uint32_t mrqc;
3549         uint64_t rss_hf;
3550         uint32_t mrqc_reg;
3551
3552         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3553
3554         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3555                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3556                         "NIC.");
3557                 return -ENOTSUP;
3558         }
3559         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3560
3561         /*
3562          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3563          *     "RSS enabling cannot be done dynamically while it must be
3564          *      preceded by a software reset"
3565          * Before changing anything, first check that the update RSS operation
3566          * does not attempt to disable RSS, if RSS was enabled at
3567          * initialization time, or does not attempt to enable RSS, if RSS was
3568          * disabled at initialization time.
3569          */
3570         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3571         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3572         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3573                 if (rss_hf != 0) /* Enable RSS */
3574                         return -(EINVAL);
3575                 return 0; /* Nothing to do */
3576         }
3577         /* RSS enabled */
3578         if (rss_hf == 0) /* Disable RSS */
3579                 return -(EINVAL);
3580         ixgbe_hw_rss_hash_set(hw, rss_conf);
3581         return 0;
3582 }
3583
3584 int
3585 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3586                             struct rte_eth_rss_conf *rss_conf)
3587 {
3588         struct ixgbe_hw *hw;
3589         uint8_t *hash_key;
3590         uint32_t mrqc;
3591         uint32_t rss_key;
3592         uint64_t rss_hf;
3593         uint16_t i;
3594         uint32_t mrqc_reg;
3595         uint32_t rssrk_reg;
3596
3597         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3598         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3599         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3600         hash_key = rss_conf->rss_key;
3601         if (hash_key != NULL) {
3602                 /* Return RSS hash key */
3603                 for (i = 0; i < 10; i++) {
3604                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3605                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3606                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3607                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3608                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3609                 }
3610         }
3611
3612         /* Get RSS functions configured in MRQC register */
3613         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3614         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3615                 rss_conf->rss_hf = 0;
3616                 return 0;
3617         }
3618         rss_hf = 0;
3619         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3620                 rss_hf |= ETH_RSS_IPV4;
3621         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3622                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3623         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3624                 rss_hf |= ETH_RSS_IPV6;
3625         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3626                 rss_hf |= ETH_RSS_IPV6_EX;
3627         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3628                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3629         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3630                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3631         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3632                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3633         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3634                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3635         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3636                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3637         rss_conf->rss_hf = rss_hf;
3638         return 0;
3639 }
3640
3641 static void
3642 ixgbe_rss_configure(struct rte_eth_dev *dev)
3643 {
3644         struct rte_eth_rss_conf rss_conf;
3645         struct ixgbe_adapter *adapter;
3646         struct ixgbe_hw *hw;
3647         uint32_t reta;
3648         uint16_t i;
3649         uint16_t j;
3650         uint16_t sp_reta_size;
3651         uint32_t reta_reg;
3652
3653         PMD_INIT_FUNC_TRACE();
3654         adapter = dev->data->dev_private;
3655         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3656
3657         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3658
3659         /*
3660          * Fill in redirection table
3661          * The byte-swap is needed because NIC registers are in
3662          * little-endian order.
3663          */
3664         if (adapter->rss_reta_updated == 0) {
3665                 reta = 0;
3666                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3667                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3668
3669                         if (j == dev->data->nb_rx_queues)
3670                                 j = 0;
3671                         reta = (reta << 8) | j;
3672                         if ((i & 3) == 3)
3673                                 IXGBE_WRITE_REG(hw, reta_reg,
3674                                                 rte_bswap32(reta));
3675                 }
3676         }
3677
3678         /*
3679          * Configure the RSS key and the RSS protocols used to compute
3680          * the RSS hash of input packets.
3681          */
3682         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3683         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3684                 ixgbe_rss_disable(dev);
3685                 return;
3686         }
3687         if (rss_conf.rss_key == NULL)
3688                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3689         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3690 }
3691
3692 #define NUM_VFTA_REGISTERS 128
3693 #define NIC_RX_BUFFER_SIZE 0x200
3694 #define X550_RX_BUFFER_SIZE 0x180
3695
3696 static void
3697 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3698 {
3699         struct rte_eth_vmdq_dcb_conf *cfg;
3700         struct ixgbe_hw *hw;
3701         enum rte_eth_nb_pools num_pools;
3702         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3703         uint16_t pbsize;
3704         uint8_t nb_tcs; /* number of traffic classes */
3705         int i;
3706
3707         PMD_INIT_FUNC_TRACE();
3708         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3709         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3710         num_pools = cfg->nb_queue_pools;
3711         /* Check we have a valid number of pools */
3712         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3713                 ixgbe_rss_disable(dev);
3714                 return;
3715         }
3716         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3717         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3718
3719         /*
3720          * RXPBSIZE
3721          * split rx buffer up into sections, each for 1 traffic class
3722          */
3723         switch (hw->mac.type) {
3724         case ixgbe_mac_X550:
3725         case ixgbe_mac_X550EM_x:
3726         case ixgbe_mac_X550EM_a:
3727                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3728                 break;
3729         default:
3730                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3731                 break;
3732         }
3733         for (i = 0; i < nb_tcs; i++) {
3734                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3735
3736                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3737                 /* clear 10 bits. */
3738                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3739                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3740         }
3741         /* zero alloc all unused TCs */
3742         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3743                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3744
3745                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3746                 /* clear 10 bits. */
3747                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3748         }
3749
3750         /* MRQC: enable vmdq and dcb */
3751         mrqc = (num_pools == ETH_16_POOLS) ?
3752                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3753         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3754
3755         /* PFVTCTL: turn on virtualisation and set the default pool */
3756         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3757         if (cfg->enable_default_pool) {
3758                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3759         } else {
3760                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3761         }
3762
3763         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3764
3765         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3766         queue_mapping = 0;
3767         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3768                 /*
3769                  * mapping is done with 3 bits per priority,
3770                  * so shift by i*3 each time
3771                  */
3772                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3773
3774         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3775
3776         /* RTRPCS: DCB related */
3777         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3778
3779         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3780         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3781         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3782         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3783
3784         /* VFTA - enable all vlan filters */
3785         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3786                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3787         }
3788
3789         /* VFRE: pool enabling for receive - 16 or 32 */
3790         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3791                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3792
3793         /*
3794          * MPSAR - allow pools to read specific mac addresses
3795          * In this case, all pools should be able to read from mac addr 0
3796          */
3797         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3798         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3799
3800         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3801         for (i = 0; i < cfg->nb_pool_maps; i++) {
3802                 /* set vlan id in VF register and set the valid bit */
3803                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3804                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3805                 /*
3806                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3807                  * pools, we only need to use the first half of the register
3808                  * i.e. bits 0-31
3809                  */
3810                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3811         }
3812 }
3813
3814 /**
3815  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3816  * @dev: pointer to eth_dev structure
3817  * @dcb_config: pointer to ixgbe_dcb_config structure
3818  */
3819 static void
3820 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3821                        struct ixgbe_dcb_config *dcb_config)
3822 {
3823         uint32_t reg;
3824         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3825
3826         PMD_INIT_FUNC_TRACE();
3827         if (hw->mac.type != ixgbe_mac_82598EB) {
3828                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3829                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3830                 reg |= IXGBE_RTTDCS_ARBDIS;
3831                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3832
3833                 /* Enable DCB for Tx with 8 TCs */
3834                 if (dcb_config->num_tcs.pg_tcs == 8) {
3835                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3836                 } else {
3837                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3838                 }
3839                 if (dcb_config->vt_mode)
3840                         reg |= IXGBE_MTQC_VT_ENA;
3841                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3842
3843                 /* Enable the Tx desc arbiter */
3844                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3845                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3846                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3847
3848                 /* Enable Security TX Buffer IFG for DCB */
3849                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3850                 reg |= IXGBE_SECTX_DCB;
3851                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3852         }
3853 }
3854
3855 /**
3856  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3857  * @dev: pointer to rte_eth_dev structure
3858  * @dcb_config: pointer to ixgbe_dcb_config structure
3859  */
3860 static void
3861 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3862                         struct ixgbe_dcb_config *dcb_config)
3863 {
3864         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3865                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3866         struct ixgbe_hw *hw =
3867                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3868
3869         PMD_INIT_FUNC_TRACE();
3870         if (hw->mac.type != ixgbe_mac_82598EB)
3871                 /*PF VF Transmit Enable*/
3872                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3873                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3874
3875         /*Configure general DCB TX parameters*/
3876         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3877 }
3878
3879 static void
3880 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3881                         struct ixgbe_dcb_config *dcb_config)
3882 {
3883         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3884                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3885         struct ixgbe_dcb_tc_config *tc;
3886         uint8_t i, j;
3887
3888         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3889         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3890                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3891                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3892         } else {
3893                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3894                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3895         }
3896
3897         /* Initialize User Priority to Traffic Class mapping */
3898         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3899                 tc = &dcb_config->tc_config[j];
3900                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3901         }
3902
3903         /* User Priority to Traffic Class mapping */
3904         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3905                 j = vmdq_rx_conf->dcb_tc[i];
3906                 tc = &dcb_config->tc_config[j];
3907                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3908                                                 (uint8_t)(1 << i);
3909         }
3910 }
3911
3912 static void
3913 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3914                         struct ixgbe_dcb_config *dcb_config)
3915 {
3916         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3917                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3918         struct ixgbe_dcb_tc_config *tc;
3919         uint8_t i, j;
3920
3921         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3922         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3923                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3924                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3925         } else {
3926                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3927                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3928         }
3929
3930         /* Initialize User Priority to Traffic Class mapping */
3931         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3932                 tc = &dcb_config->tc_config[j];
3933                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3934         }
3935
3936         /* User Priority to Traffic Class mapping */
3937         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3938                 j = vmdq_tx_conf->dcb_tc[i];
3939                 tc = &dcb_config->tc_config[j];
3940                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3941                                                 (uint8_t)(1 << i);
3942         }
3943 }
3944
3945 static void
3946 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3947                 struct ixgbe_dcb_config *dcb_config)
3948 {
3949         struct rte_eth_dcb_rx_conf *rx_conf =
3950                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3951         struct ixgbe_dcb_tc_config *tc;
3952         uint8_t i, j;
3953
3954         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3955         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3956
3957         /* Initialize User Priority to Traffic Class mapping */
3958         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3959                 tc = &dcb_config->tc_config[j];
3960                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3961         }
3962
3963         /* User Priority to Traffic Class mapping */
3964         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3965                 j = rx_conf->dcb_tc[i];
3966                 tc = &dcb_config->tc_config[j];
3967                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3968                                                 (uint8_t)(1 << i);
3969         }
3970 }
3971
3972 static void
3973 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3974                 struct ixgbe_dcb_config *dcb_config)
3975 {
3976         struct rte_eth_dcb_tx_conf *tx_conf =
3977                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3978         struct ixgbe_dcb_tc_config *tc;
3979         uint8_t i, j;
3980
3981         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3982         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3983
3984         /* Initialize User Priority to Traffic Class mapping */
3985         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3986                 tc = &dcb_config->tc_config[j];
3987                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3988         }
3989
3990         /* User Priority to Traffic Class mapping */
3991         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3992                 j = tx_conf->dcb_tc[i];
3993                 tc = &dcb_config->tc_config[j];
3994                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3995                                                 (uint8_t)(1 << i);
3996         }
3997 }
3998
3999 /**
4000  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
4001  * @dev: pointer to eth_dev structure
4002  * @dcb_config: pointer to ixgbe_dcb_config structure
4003  */
4004 static void
4005 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
4006                        struct ixgbe_dcb_config *dcb_config)
4007 {
4008         uint32_t reg;
4009         uint32_t vlanctrl;
4010         uint8_t i;
4011         uint32_t q;
4012         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4013
4014         PMD_INIT_FUNC_TRACE();
4015         /*
4016          * Disable the arbiter before changing parameters
4017          * (always enable recycle mode; WSP)
4018          */
4019         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
4020         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4021
4022         if (hw->mac.type != ixgbe_mac_82598EB) {
4023                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
4024                 if (dcb_config->num_tcs.pg_tcs == 4) {
4025                         if (dcb_config->vt_mode)
4026                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4027                                         IXGBE_MRQC_VMDQRT4TCEN;
4028                         else {
4029                                 /* no matter the mode is DCB or DCB_RSS, just
4030                                  * set the MRQE to RSSXTCEN. RSS is controlled
4031                                  * by RSS_FIELD
4032                                  */
4033                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4034                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4035                                         IXGBE_MRQC_RTRSS4TCEN;
4036                         }
4037                 }
4038                 if (dcb_config->num_tcs.pg_tcs == 8) {
4039                         if (dcb_config->vt_mode)
4040                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4041                                         IXGBE_MRQC_VMDQRT8TCEN;
4042                         else {
4043                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4044                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4045                                         IXGBE_MRQC_RTRSS8TCEN;
4046                         }
4047                 }
4048
4049                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4050
4051                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4052                         /* Disable drop for all queues in VMDQ mode*/
4053                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4054                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4055                                                 (IXGBE_QDE_WRITE |
4056                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4057                 } else {
4058                         /* Enable drop for all queues in SRIOV mode */
4059                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4060                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4061                                                 (IXGBE_QDE_WRITE |
4062                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4063                                                  IXGBE_QDE_ENABLE));
4064                 }
4065         }
4066
4067         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4068         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4069         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4070         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4071
4072         /* VFTA - enable all vlan filters */
4073         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4074                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4075         }
4076
4077         /*
4078          * Configure Rx packet plane (recycle mode; WSP) and
4079          * enable arbiter
4080          */
4081         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4082         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4083 }
4084
4085 static void
4086 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4087                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4088 {
4089         switch (hw->mac.type) {
4090         case ixgbe_mac_82598EB:
4091                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4092                 break;
4093         case ixgbe_mac_82599EB:
4094         case ixgbe_mac_X540:
4095         case ixgbe_mac_X550:
4096         case ixgbe_mac_X550EM_x:
4097         case ixgbe_mac_X550EM_a:
4098                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4099                                                   tsa, map);
4100                 break;
4101         default:
4102                 break;
4103         }
4104 }
4105
4106 static void
4107 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4108                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4109 {
4110         switch (hw->mac.type) {
4111         case ixgbe_mac_82598EB:
4112                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4113                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4114                 break;
4115         case ixgbe_mac_82599EB:
4116         case ixgbe_mac_X540:
4117         case ixgbe_mac_X550:
4118         case ixgbe_mac_X550EM_x:
4119         case ixgbe_mac_X550EM_a:
4120                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4121                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4122                 break;
4123         default:
4124                 break;
4125         }
4126 }
4127
4128 #define DCB_RX_CONFIG  1
4129 #define DCB_TX_CONFIG  1
4130 #define DCB_TX_PB      1024
4131 /**
4132  * ixgbe_dcb_hw_configure - Enable DCB and configure
4133  * general DCB in VT mode and non-VT mode parameters
4134  * @dev: pointer to rte_eth_dev structure
4135  * @dcb_config: pointer to ixgbe_dcb_config structure
4136  */
4137 static int
4138 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4139                         struct ixgbe_dcb_config *dcb_config)
4140 {
4141         int     ret = 0;
4142         uint8_t i, pfc_en, nb_tcs;
4143         uint16_t pbsize, rx_buffer_size;
4144         uint8_t config_dcb_rx = 0;
4145         uint8_t config_dcb_tx = 0;
4146         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4147         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4148         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4149         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4150         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4151         struct ixgbe_dcb_tc_config *tc;
4152         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4153                 RTE_ETHER_CRC_LEN;
4154         struct ixgbe_hw *hw =
4155                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4156         struct ixgbe_bw_conf *bw_conf =
4157                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4158
4159         switch (dev->data->dev_conf.rxmode.mq_mode) {
4160         case ETH_MQ_RX_VMDQ_DCB:
4161                 dcb_config->vt_mode = true;
4162                 if (hw->mac.type != ixgbe_mac_82598EB) {
4163                         config_dcb_rx = DCB_RX_CONFIG;
4164                         /*
4165                          *get dcb and VT rx configuration parameters
4166                          *from rte_eth_conf
4167                          */
4168                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4169                         /*Configure general VMDQ and DCB RX parameters*/
4170                         ixgbe_vmdq_dcb_configure(dev);
4171                 }
4172                 break;
4173         case ETH_MQ_RX_DCB:
4174         case ETH_MQ_RX_DCB_RSS:
4175                 dcb_config->vt_mode = false;
4176                 config_dcb_rx = DCB_RX_CONFIG;
4177                 /* Get dcb TX configuration parameters from rte_eth_conf */
4178                 ixgbe_dcb_rx_config(dev, dcb_config);
4179                 /*Configure general DCB RX parameters*/
4180                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4181                 break;
4182         default:
4183                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4184                 break;
4185         }
4186         switch (dev->data->dev_conf.txmode.mq_mode) {
4187         case ETH_MQ_TX_VMDQ_DCB:
4188                 dcb_config->vt_mode = true;
4189                 config_dcb_tx = DCB_TX_CONFIG;
4190                 /* get DCB and VT TX configuration parameters
4191                  * from rte_eth_conf
4192                  */
4193                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4194                 /*Configure general VMDQ and DCB TX parameters*/
4195                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4196                 break;
4197
4198         case ETH_MQ_TX_DCB:
4199                 dcb_config->vt_mode = false;
4200                 config_dcb_tx = DCB_TX_CONFIG;
4201                 /*get DCB TX configuration parameters from rte_eth_conf*/
4202                 ixgbe_dcb_tx_config(dev, dcb_config);
4203                 /*Configure general DCB TX parameters*/
4204                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4205                 break;
4206         default:
4207                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4208                 break;
4209         }
4210
4211         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4212         /* Unpack map */
4213         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4214         if (nb_tcs == ETH_4_TCS) {
4215                 /* Avoid un-configured priority mapping to TC0 */
4216                 uint8_t j = 4;
4217                 uint8_t mask = 0xFF;
4218
4219                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4220                         mask = (uint8_t)(mask & (~(1 << map[i])));
4221                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4222                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4223                                 map[j++] = i;
4224                         mask >>= 1;
4225                 }
4226                 /* Re-configure 4 TCs BW */
4227                 for (i = 0; i < nb_tcs; i++) {
4228                         tc = &dcb_config->tc_config[i];
4229                         if (bw_conf->tc_num != nb_tcs)
4230                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4231                                         (uint8_t)(100 / nb_tcs);
4232                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4233                                                 (uint8_t)(100 / nb_tcs);
4234                 }
4235                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4236                         tc = &dcb_config->tc_config[i];
4237                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4238                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4239                 }
4240         } else {
4241                 /* Re-configure 8 TCs BW */
4242                 for (i = 0; i < nb_tcs; i++) {
4243                         tc = &dcb_config->tc_config[i];
4244                         if (bw_conf->tc_num != nb_tcs)
4245                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4246                                         (uint8_t)(100 / nb_tcs + (i & 1));
4247                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4248                                 (uint8_t)(100 / nb_tcs + (i & 1));
4249                 }
4250         }
4251
4252         switch (hw->mac.type) {
4253         case ixgbe_mac_X550:
4254         case ixgbe_mac_X550EM_x:
4255         case ixgbe_mac_X550EM_a:
4256                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4257                 break;
4258         default:
4259                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4260                 break;
4261         }
4262
4263         if (config_dcb_rx) {
4264                 /* Set RX buffer size */
4265                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4266                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4267
4268                 for (i = 0; i < nb_tcs; i++) {
4269                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4270                 }
4271                 /* zero alloc all unused TCs */
4272                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4273                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4274                 }
4275         }
4276         if (config_dcb_tx) {
4277                 /* Only support an equally distributed
4278                  *  Tx packet buffer strategy.
4279                  */
4280                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4281                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4282
4283                 for (i = 0; i < nb_tcs; i++) {
4284                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4285                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4286                 }
4287                 /* Clear unused TCs, if any, to zero buffer size*/
4288                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4289                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4290                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4291                 }
4292         }
4293
4294         /*Calculates traffic class credits*/
4295         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4296                                 IXGBE_DCB_TX_CONFIG);
4297         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4298                                 IXGBE_DCB_RX_CONFIG);
4299
4300         if (config_dcb_rx) {
4301                 /* Unpack CEE standard containers */
4302                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4303                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4304                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4305                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4306                 /* Configure PG(ETS) RX */
4307                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4308         }
4309
4310         if (config_dcb_tx) {
4311                 /* Unpack CEE standard containers */
4312                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4313                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4314                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4315                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4316                 /* Configure PG(ETS) TX */
4317                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4318         }
4319
4320         /*Configure queue statistics registers*/
4321         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4322
4323         /* Check if the PFC is supported */
4324         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4325                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4326                 for (i = 0; i < nb_tcs; i++) {
4327                         /*
4328                         * If the TC count is 8,and the default high_water is 48,
4329                         * the low_water is 16 as default.
4330                         */
4331                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4332                         hw->fc.low_water[i] = pbsize / 4;
4333                         /* Enable pfc for this TC */
4334                         tc = &dcb_config->tc_config[i];
4335                         tc->pfc = ixgbe_dcb_pfc_enabled;
4336                 }
4337                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4338                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4339                         pfc_en &= 0x0F;
4340                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4341         }
4342
4343         return ret;
4344 }
4345
4346 /**
4347  * ixgbe_configure_dcb - Configure DCB  Hardware
4348  * @dev: pointer to rte_eth_dev
4349  */
4350 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4351 {
4352         struct ixgbe_dcb_config *dcb_cfg =
4353                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4354         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4355
4356         PMD_INIT_FUNC_TRACE();
4357
4358         /* check support mq_mode for DCB */
4359         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4360             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4361             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4362                 return;
4363
4364         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4365                 return;
4366
4367         /** Configure DCB hardware **/
4368         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4369 }
4370
4371 /*
4372  * VMDq only support for 10 GbE NIC.
4373  */
4374 static void
4375 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4376 {
4377         struct rte_eth_vmdq_rx_conf *cfg;
4378         struct ixgbe_hw *hw;
4379         enum rte_eth_nb_pools num_pools;
4380         uint32_t mrqc, vt_ctl, vlanctrl;
4381         uint32_t vmolr = 0;
4382         int i;
4383
4384         PMD_INIT_FUNC_TRACE();
4385         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4386         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4387         num_pools = cfg->nb_queue_pools;
4388
4389         ixgbe_rss_disable(dev);
4390
4391         /* MRQC: enable vmdq */
4392         mrqc = IXGBE_MRQC_VMDQEN;
4393         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4394
4395         /* PFVTCTL: turn on virtualisation and set the default pool */
4396         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4397         if (cfg->enable_default_pool)
4398                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4399         else
4400                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4401
4402         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4403
4404         for (i = 0; i < (int)num_pools; i++) {
4405                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4406                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4407         }
4408
4409         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4410         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4411         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4412         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4413
4414         /* VFTA - enable all vlan filters */
4415         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4416                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4417
4418         /* VFRE: pool enabling for receive - 64 */
4419         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4420         if (num_pools == ETH_64_POOLS)
4421                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4422
4423         /*
4424          * MPSAR - allow pools to read specific mac addresses
4425          * In this case, all pools should be able to read from mac addr 0
4426          */
4427         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4428         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4429
4430         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4431         for (i = 0; i < cfg->nb_pool_maps; i++) {
4432                 /* set vlan id in VF register and set the valid bit */
4433                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4434                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4435                 /*
4436                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4437                  * pools, we only need to use the first half of the register
4438                  * i.e. bits 0-31
4439                  */
4440                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4441                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4442                                         (cfg->pool_map[i].pools & UINT32_MAX));
4443                 else
4444                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4445                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4446
4447         }
4448
4449         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4450         if (cfg->enable_loop_back) {
4451                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4452                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4453                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4454         }
4455
4456         IXGBE_WRITE_FLUSH(hw);
4457 }
4458
4459 /*
4460  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4461  * @hw: pointer to hardware structure
4462  */
4463 static void
4464 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4465 {
4466         uint32_t reg;
4467         uint32_t q;
4468
4469         PMD_INIT_FUNC_TRACE();
4470         /*PF VF Transmit Enable*/
4471         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4472         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4473
4474         /* Disable the Tx desc arbiter so that MTQC can be changed */
4475         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4476         reg |= IXGBE_RTTDCS_ARBDIS;
4477         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4478
4479         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4480         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4481
4482         /* Disable drop for all queues */
4483         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4484                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4485                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4486
4487         /* Enable the Tx desc arbiter */
4488         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4489         reg &= ~IXGBE_RTTDCS_ARBDIS;
4490         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4491
4492         IXGBE_WRITE_FLUSH(hw);
4493 }
4494
4495 static int __rte_cold
4496 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4497 {
4498         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4499         uint64_t dma_addr;
4500         unsigned int i;
4501
4502         /* Initialize software ring entries */
4503         for (i = 0; i < rxq->nb_rx_desc; i++) {
4504                 volatile union ixgbe_adv_rx_desc *rxd;
4505                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4506
4507                 if (mbuf == NULL) {
4508                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4509                                      (unsigned) rxq->queue_id);
4510                         return -ENOMEM;
4511                 }
4512
4513                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4514                 mbuf->port = rxq->port_id;
4515
4516                 dma_addr =
4517                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4518                 rxd = &rxq->rx_ring[i];
4519                 rxd->read.hdr_addr = 0;
4520                 rxd->read.pkt_addr = dma_addr;
4521                 rxe[i].mbuf = mbuf;
4522         }
4523
4524         return 0;
4525 }
4526
4527 static int
4528 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4529 {
4530         struct ixgbe_hw *hw;
4531         uint32_t mrqc;
4532
4533         ixgbe_rss_configure(dev);
4534
4535         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4536
4537         /* MRQC: enable VF RSS */
4538         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4539         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4540         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4541         case ETH_64_POOLS:
4542                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4543                 break;
4544
4545         case ETH_32_POOLS:
4546                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4547                 break;
4548
4549         default:
4550                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4551                 return -EINVAL;
4552         }
4553
4554         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4555
4556         return 0;
4557 }
4558
4559 static int
4560 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4561 {
4562         struct ixgbe_hw *hw =
4563                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4564
4565         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4566         case ETH_64_POOLS:
4567                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4568                         IXGBE_MRQC_VMDQEN);
4569                 break;
4570
4571         case ETH_32_POOLS:
4572                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4573                         IXGBE_MRQC_VMDQRT4TCEN);
4574                 break;
4575
4576         case ETH_16_POOLS:
4577                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4578                         IXGBE_MRQC_VMDQRT8TCEN);
4579                 break;
4580         default:
4581                 PMD_INIT_LOG(ERR,
4582                         "invalid pool number in IOV mode");
4583                 break;
4584         }
4585         return 0;
4586 }
4587
4588 static int
4589 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4590 {
4591         struct ixgbe_hw *hw =
4592                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4593
4594         if (hw->mac.type == ixgbe_mac_82598EB)
4595                 return 0;
4596
4597         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4598                 /*
4599                  * SRIOV inactive scheme
4600                  * any DCB/RSS w/o VMDq multi-queue setting
4601                  */
4602                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4603                 case ETH_MQ_RX_RSS:
4604                 case ETH_MQ_RX_DCB_RSS:
4605                 case ETH_MQ_RX_VMDQ_RSS:
4606                         ixgbe_rss_configure(dev);
4607                         break;
4608
4609                 case ETH_MQ_RX_VMDQ_DCB:
4610                         ixgbe_vmdq_dcb_configure(dev);
4611                         break;
4612
4613                 case ETH_MQ_RX_VMDQ_ONLY:
4614                         ixgbe_vmdq_rx_hw_configure(dev);
4615                         break;
4616
4617                 case ETH_MQ_RX_NONE:
4618                 default:
4619                         /* if mq_mode is none, disable rss mode.*/
4620                         ixgbe_rss_disable(dev);
4621                         break;
4622                 }
4623         } else {
4624                 /* SRIOV active scheme
4625                  * Support RSS together with SRIOV.
4626                  */
4627                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4628                 case ETH_MQ_RX_RSS:
4629                 case ETH_MQ_RX_VMDQ_RSS:
4630                         ixgbe_config_vf_rss(dev);
4631                         break;
4632                 case ETH_MQ_RX_VMDQ_DCB:
4633                 case ETH_MQ_RX_DCB:
4634                 /* In SRIOV, the configuration is the same as VMDq case */
4635                         ixgbe_vmdq_dcb_configure(dev);
4636                         break;
4637                 /* DCB/RSS together with SRIOV is not supported */
4638                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4639                 case ETH_MQ_RX_DCB_RSS:
4640                         PMD_INIT_LOG(ERR,
4641                                 "Could not support DCB/RSS with VMDq & SRIOV");
4642                         return -1;
4643                 default:
4644                         ixgbe_config_vf_default(dev);
4645                         break;
4646                 }
4647         }
4648
4649         return 0;
4650 }
4651
4652 static int
4653 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4654 {
4655         struct ixgbe_hw *hw =
4656                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4657         uint32_t mtqc;
4658         uint32_t rttdcs;
4659
4660         if (hw->mac.type == ixgbe_mac_82598EB)
4661                 return 0;
4662
4663         /* disable arbiter before setting MTQC */
4664         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4665         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4666         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4667
4668         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4669                 /*
4670                  * SRIOV inactive scheme
4671                  * any DCB w/o VMDq multi-queue setting
4672                  */
4673                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4674                         ixgbe_vmdq_tx_hw_configure(hw);
4675                 else {
4676                         mtqc = IXGBE_MTQC_64Q_1PB;
4677                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4678                 }
4679         } else {
4680                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4681
4682                 /*
4683                  * SRIOV active scheme
4684                  * FIXME if support DCB together with VMDq & SRIOV
4685                  */
4686                 case ETH_64_POOLS:
4687                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4688                         break;
4689                 case ETH_32_POOLS:
4690                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4691                         break;
4692                 case ETH_16_POOLS:
4693                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4694                                 IXGBE_MTQC_8TC_8TQ;
4695                         break;
4696                 default:
4697                         mtqc = IXGBE_MTQC_64Q_1PB;
4698                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4699                 }
4700                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4701         }
4702
4703         /* re-enable arbiter */
4704         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4705         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4706
4707         return 0;
4708 }
4709
4710 /**
4711  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4712  *
4713  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4714  * spec rev. 3.0 chapter 8.2.3.8.13.
4715  *
4716  * @pool Memory pool of the Rx queue
4717  */
4718 static inline uint32_t
4719 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4720 {
4721         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4722
4723         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4724         uint16_t maxdesc =
4725                 RTE_IPV4_MAX_PKT_LEN /
4726                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4727
4728         if (maxdesc >= 16)
4729                 return IXGBE_RSCCTL_MAXDESC_16;
4730         else if (maxdesc >= 8)
4731                 return IXGBE_RSCCTL_MAXDESC_8;
4732         else if (maxdesc >= 4)
4733                 return IXGBE_RSCCTL_MAXDESC_4;
4734         else
4735                 return IXGBE_RSCCTL_MAXDESC_1;
4736 }
4737
4738 /**
4739  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4740  * interrupt
4741  *
4742  * (Taken from FreeBSD tree)
4743  * (yes this is all very magic and confusing :)
4744  *
4745  * @dev port handle
4746  * @entry the register array entry
4747  * @vector the MSIX vector for this queue
4748  * @type RX/TX/MISC
4749  */
4750 static void
4751 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4752 {
4753         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4754         u32 ivar, index;
4755
4756         vector |= IXGBE_IVAR_ALLOC_VAL;
4757
4758         switch (hw->mac.type) {
4759
4760         case ixgbe_mac_82598EB:
4761                 if (type == -1)
4762                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4763                 else
4764                         entry += (type * 64);
4765                 index = (entry >> 2) & 0x1F;
4766                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4767                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4768                 ivar |= (vector << (8 * (entry & 0x3)));
4769                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4770                 break;
4771
4772         case ixgbe_mac_82599EB:
4773         case ixgbe_mac_X540:
4774                 if (type == -1) { /* MISC IVAR */
4775                         index = (entry & 1) * 8;
4776                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4777                         ivar &= ~(0xFF << index);
4778                         ivar |= (vector << index);
4779                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4780                 } else {        /* RX/TX IVARS */
4781                         index = (16 * (entry & 1)) + (8 * type);
4782                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4783                         ivar &= ~(0xFF << index);
4784                         ivar |= (vector << index);
4785                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4786                 }
4787
4788                 break;
4789
4790         default:
4791                 break;
4792         }
4793 }
4794
4795 void __rte_cold
4796 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4797 {
4798         uint16_t i, rx_using_sse;
4799         struct ixgbe_adapter *adapter = dev->data->dev_private;
4800
4801         /*
4802          * In order to allow Vector Rx there are a few configuration
4803          * conditions to be met and Rx Bulk Allocation should be allowed.
4804          */
4805         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4806             !adapter->rx_bulk_alloc_allowed ||
4807                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4808                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4809                                     "preconditions",
4810                              dev->data->port_id);
4811
4812                 adapter->rx_vec_allowed = false;
4813         }
4814
4815         /*
4816          * Initialize the appropriate LRO callback.
4817          *
4818          * If all queues satisfy the bulk allocation preconditions
4819          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4820          * Otherwise use a single allocation version.
4821          */
4822         if (dev->data->lro) {
4823                 if (adapter->rx_bulk_alloc_allowed) {
4824                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4825                                            "allocation version");
4826                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4827                 } else {
4828                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4829                                            "allocation version");
4830                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4831                 }
4832         } else if (dev->data->scattered_rx) {
4833                 /*
4834                  * Set the non-LRO scattered callback: there are Vector and
4835                  * single allocation versions.
4836                  */
4837                 if (adapter->rx_vec_allowed) {
4838                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4839                                             "callback (port=%d).",
4840                                      dev->data->port_id);
4841
4842                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4843                 } else if (adapter->rx_bulk_alloc_allowed) {
4844                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4845                                            "allocation callback (port=%d).",
4846                                      dev->data->port_id);
4847                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4848                 } else {
4849                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4850                                             "single allocation) "
4851                                             "Scattered Rx callback "
4852                                             "(port=%d).",
4853                                      dev->data->port_id);
4854
4855                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4856                 }
4857         /*
4858          * Below we set "simple" callbacks according to port/queues parameters.
4859          * If parameters allow we are going to choose between the following
4860          * callbacks:
4861          *    - Vector
4862          *    - Bulk Allocation
4863          *    - Single buffer allocation (the simplest one)
4864          */
4865         } else if (adapter->rx_vec_allowed) {
4866                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4867                                     "burst size no less than %d (port=%d).",
4868                              RTE_IXGBE_DESCS_PER_LOOP,
4869                              dev->data->port_id);
4870
4871                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4872         } else if (adapter->rx_bulk_alloc_allowed) {
4873                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4874                                     "satisfied. Rx Burst Bulk Alloc function "
4875                                     "will be used on port=%d.",
4876                              dev->data->port_id);
4877
4878                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4879         } else {
4880                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4881                                     "satisfied, or Scattered Rx is requested "
4882                                     "(port=%d).",
4883                              dev->data->port_id);
4884
4885                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4886         }
4887
4888         /* Propagate information about RX function choice through all queues. */
4889
4890         rx_using_sse =
4891                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4892                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4893
4894         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4895                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4896
4897                 rxq->rx_using_sse = rx_using_sse;
4898 #ifdef RTE_LIB_SECURITY
4899                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4900                                 DEV_RX_OFFLOAD_SECURITY);
4901 #endif
4902         }
4903 }
4904
4905 /**
4906  * ixgbe_set_rsc - configure RSC related port HW registers
4907  *
4908  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4909  * of 82599 Spec (x540 configuration is virtually the same).
4910  *
4911  * @dev port handle
4912  *
4913  * Returns 0 in case of success or a non-zero error code
4914  */
4915 static int
4916 ixgbe_set_rsc(struct rte_eth_dev *dev)
4917 {
4918         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4919         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4920         struct rte_eth_dev_info dev_info = { 0 };
4921         bool rsc_capable = false;
4922         uint16_t i;
4923         uint32_t rdrxctl;
4924         uint32_t rfctl;
4925
4926         /* Sanity check */
4927         dev->dev_ops->dev_infos_get(dev, &dev_info);
4928         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4929                 rsc_capable = true;
4930
4931         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4932                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4933                                    "support it");
4934                 return -EINVAL;
4935         }
4936
4937         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4938
4939         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4940              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4941                 /*
4942                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4943                  * 3.0 RSC configuration requires HW CRC stripping being
4944                  * enabled. If user requested both HW CRC stripping off
4945                  * and RSC on - return an error.
4946                  */
4947                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4948                                     "is disabled");
4949                 return -EINVAL;
4950         }
4951
4952         /* RFCTL configuration  */
4953         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4954         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4955                 rfctl &= ~IXGBE_RFCTL_RSC_DIS;
4956         else
4957                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4958         /* disable NFS filtering */
4959         rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS;
4960         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4961
4962         /* If LRO hasn't been requested - we are done here. */
4963         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4964                 return 0;
4965
4966         /* Set RDRXCTL.RSCACKC bit */
4967         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4968         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4969         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4970
4971         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4972         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4973                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4974                 uint32_t srrctl =
4975                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4976                 uint32_t rscctl =
4977                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4978                 uint32_t psrtype =
4979                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4980                 uint32_t eitr =
4981                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4982
4983                 /*
4984                  * ixgbe PMD doesn't support header-split at the moment.
4985                  *
4986                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4987                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4988                  * should be configured even if header split is not
4989                  * enabled. We will configure it 128 bytes following the
4990                  * recommendation in the spec.
4991                  */
4992                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4993                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4994                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4995
4996                 /*
4997                  * TODO: Consider setting the Receive Descriptor Minimum
4998                  * Threshold Size for an RSC case. This is not an obviously
4999                  * beneficiary option but the one worth considering...
5000                  */
5001
5002                 rscctl |= IXGBE_RSCCTL_RSCEN;
5003                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
5004                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
5005
5006                 /*
5007                  * RSC: Set ITR interval corresponding to 2K ints/s.
5008                  *
5009                  * Full-sized RSC aggregations for a 10Gb/s link will
5010                  * arrive at about 20K aggregation/s rate.
5011                  *
5012                  * 2K inst/s rate will make only 10% of the
5013                  * aggregations to be closed due to the interrupt timer
5014                  * expiration for a streaming at wire-speed case.
5015                  *
5016                  * For a sparse streaming case this setting will yield
5017                  * at most 500us latency for a single RSC aggregation.
5018                  */
5019                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
5020                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
5021                 eitr |= IXGBE_EITR_CNT_WDIS;
5022
5023                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5024                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
5025                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
5026                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
5027
5028                 /*
5029                  * RSC requires the mapping of the queue to the
5030                  * interrupt vector.
5031                  */
5032                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5033         }
5034
5035         dev->data->lro = 1;
5036
5037         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5038
5039         return 0;
5040 }
5041
5042 /*
5043  * Initializes Receive Unit.
5044  */
5045 int __rte_cold
5046 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5047 {
5048         struct ixgbe_hw     *hw;
5049         struct ixgbe_rx_queue *rxq;
5050         uint64_t bus_addr;
5051         uint32_t rxctrl;
5052         uint32_t fctrl;
5053         uint32_t hlreg0;
5054         uint32_t maxfrs;
5055         uint32_t srrctl;
5056         uint32_t rdrxctl;
5057         uint32_t rxcsum;
5058         uint16_t buf_size;
5059         uint16_t i;
5060         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5061         int rc;
5062
5063         PMD_INIT_FUNC_TRACE();
5064         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5065
5066         /*
5067          * Make sure receives are disabled while setting
5068          * up the RX context (registers, descriptor rings, etc.).
5069          */
5070         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5071         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5072
5073         /* Enable receipt of broadcasted frames */
5074         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5075         fctrl |= IXGBE_FCTRL_BAM;
5076         fctrl |= IXGBE_FCTRL_DPF;
5077         fctrl |= IXGBE_FCTRL_PMCF;
5078         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5079
5080         /*
5081          * Configure CRC stripping, if any.
5082          */
5083         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5084         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5085                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5086         else
5087                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5088
5089         /*
5090          * Configure jumbo frame support, if any.
5091          */
5092         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5093                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5094                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5095                 maxfrs &= 0x0000FFFF;
5096                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5097                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5098         } else
5099                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5100
5101         /*
5102          * If loopback mode is configured, set LPBK bit.
5103          */
5104         if (dev->data->dev_conf.lpbk_mode != 0) {
5105                 rc = ixgbe_check_supported_loopback_mode(dev);
5106                 if (rc < 0) {
5107                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5108                         return rc;
5109                 }
5110                 hlreg0 |= IXGBE_HLREG0_LPBK;
5111         } else {
5112                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5113         }
5114
5115         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5116
5117         /*
5118          * Assume no header split and no VLAN strip support
5119          * on any Rx queue first .
5120          */
5121         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5122         /* Setup RX queues */
5123         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5124                 rxq = dev->data->rx_queues[i];
5125
5126                 /*
5127                  * Reset crc_len in case it was changed after queue setup by a
5128                  * call to configure.
5129                  */
5130                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5131                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5132                 else
5133                         rxq->crc_len = 0;
5134
5135                 /* Setup the Base and Length of the Rx Descriptor Rings */
5136                 bus_addr = rxq->rx_ring_phys_addr;
5137                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5138                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5139                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5140                                 (uint32_t)(bus_addr >> 32));
5141                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5142                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5143                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5144                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5145
5146                 /* Configure the SRRCTL register */
5147                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5148
5149                 /* Set if packets are dropped when no descriptors available */
5150                 if (rxq->drop_en)
5151                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5152
5153                 /*
5154                  * Configure the RX buffer size in the BSIZEPACKET field of
5155                  * the SRRCTL register of the queue.
5156                  * The value is in 1 KB resolution. Valid values can be from
5157                  * 1 KB to 16 KB.
5158                  */
5159                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5160                         RTE_PKTMBUF_HEADROOM);
5161                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5162                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5163
5164                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5165
5166                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5167                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5168
5169                 /* It adds dual VLAN length for supporting dual VLAN */
5170                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5171                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5172                         dev->data->scattered_rx = 1;
5173                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5174                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5175         }
5176
5177         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5178                 dev->data->scattered_rx = 1;
5179
5180         /*
5181          * Device configured with multiple RX queues.
5182          */
5183         ixgbe_dev_mq_rx_configure(dev);
5184
5185         /*
5186          * Setup the Checksum Register.
5187          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5188          * Enable IP/L4 checkum computation by hardware if requested to do so.
5189          */
5190         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5191         rxcsum |= IXGBE_RXCSUM_PCSD;
5192         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5193                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5194         else
5195                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5196
5197         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5198
5199         if (hw->mac.type == ixgbe_mac_82599EB ||
5200             hw->mac.type == ixgbe_mac_X540) {
5201                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5202                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5203                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5204                 else
5205                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5206                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5207                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5208         }
5209
5210         rc = ixgbe_set_rsc(dev);
5211         if (rc)
5212                 return rc;
5213
5214         ixgbe_set_rx_function(dev);
5215
5216         return 0;
5217 }
5218
5219 /*
5220  * Initializes Transmit Unit.
5221  */
5222 void __rte_cold
5223 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5224 {
5225         struct ixgbe_hw     *hw;
5226         struct ixgbe_tx_queue *txq;
5227         uint64_t bus_addr;
5228         uint32_t hlreg0;
5229         uint32_t txctrl;
5230         uint16_t i;
5231
5232         PMD_INIT_FUNC_TRACE();
5233         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5234
5235         /* Enable TX CRC (checksum offload requirement) and hw padding
5236          * (TSO requirement)
5237          */
5238         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5239         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5240         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5241
5242         /* Setup the Base and Length of the Tx Descriptor Rings */
5243         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5244                 txq = dev->data->tx_queues[i];
5245
5246                 bus_addr = txq->tx_ring_phys_addr;
5247                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5248                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5249                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5250                                 (uint32_t)(bus_addr >> 32));
5251                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5252                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5253                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5254                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5255                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5256
5257                 /*
5258                  * Disable Tx Head Writeback RO bit, since this hoses
5259                  * bookkeeping if things aren't delivered in order.
5260                  */
5261                 switch (hw->mac.type) {
5262                 case ixgbe_mac_82598EB:
5263                         txctrl = IXGBE_READ_REG(hw,
5264                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5265                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5266                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5267                                         txctrl);
5268                         break;
5269
5270                 case ixgbe_mac_82599EB:
5271                 case ixgbe_mac_X540:
5272                 case ixgbe_mac_X550:
5273                 case ixgbe_mac_X550EM_x:
5274                 case ixgbe_mac_X550EM_a:
5275                 default:
5276                         txctrl = IXGBE_READ_REG(hw,
5277                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5278                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5279                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5280                                         txctrl);
5281                         break;
5282                 }
5283         }
5284
5285         /* Device configured with multiple TX queues. */
5286         ixgbe_dev_mq_tx_configure(dev);
5287 }
5288
5289 /*
5290  * Check if requested loopback mode is supported
5291  */
5292 int
5293 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5294 {
5295         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5296
5297         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5298                 if (hw->mac.type == ixgbe_mac_82599EB ||
5299                      hw->mac.type == ixgbe_mac_X540 ||
5300                      hw->mac.type == ixgbe_mac_X550 ||
5301                      hw->mac.type == ixgbe_mac_X550EM_x ||
5302                      hw->mac.type == ixgbe_mac_X550EM_a)
5303                         return 0;
5304
5305         return -ENOTSUP;
5306 }
5307
5308 /*
5309  * Set up link for 82599 loopback mode Tx->Rx.
5310  */
5311 static inline void __rte_cold
5312 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5313 {
5314         PMD_INIT_FUNC_TRACE();
5315
5316         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5317                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5318                                 IXGBE_SUCCESS) {
5319                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5320                         /* ignore error */
5321                         return;
5322                 }
5323         }
5324
5325         /* Restart link */
5326         IXGBE_WRITE_REG(hw,
5327                         IXGBE_AUTOC,
5328                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5329         ixgbe_reset_pipeline_82599(hw);
5330
5331         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5332         msec_delay(50);
5333 }
5334
5335
5336 /*
5337  * Start Transmit and Receive Units.
5338  */
5339 int __rte_cold
5340 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5341 {
5342         struct ixgbe_hw     *hw;
5343         struct ixgbe_tx_queue *txq;
5344         struct ixgbe_rx_queue *rxq;
5345         uint32_t txdctl;
5346         uint32_t dmatxctl;
5347         uint32_t rxctrl;
5348         uint16_t i;
5349         int ret = 0;
5350
5351         PMD_INIT_FUNC_TRACE();
5352         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5353
5354         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5355                 txq = dev->data->tx_queues[i];
5356                 /* Setup Transmit Threshold Registers */
5357                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5358                 txdctl |= txq->pthresh & 0x7F;
5359                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5360                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5361                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5362         }
5363
5364         if (hw->mac.type != ixgbe_mac_82598EB) {
5365                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5366                 dmatxctl |= IXGBE_DMATXCTL_TE;
5367                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5368         }
5369
5370         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5371                 txq = dev->data->tx_queues[i];
5372                 if (!txq->tx_deferred_start) {
5373                         ret = ixgbe_dev_tx_queue_start(dev, i);
5374                         if (ret < 0)
5375                                 return ret;
5376                 }
5377         }
5378
5379         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5380                 rxq = dev->data->rx_queues[i];
5381                 if (!rxq->rx_deferred_start) {
5382                         ret = ixgbe_dev_rx_queue_start(dev, i);
5383                         if (ret < 0)
5384                                 return ret;
5385                 }
5386         }
5387
5388         /* Enable Receive engine */
5389         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5390         if (hw->mac.type == ixgbe_mac_82598EB)
5391                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5392         rxctrl |= IXGBE_RXCTRL_RXEN;
5393         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5394
5395         /* If loopback mode is enabled, set up the link accordingly */
5396         if (dev->data->dev_conf.lpbk_mode != 0) {
5397                 if (hw->mac.type == ixgbe_mac_82599EB)
5398                         ixgbe_setup_loopback_link_82599(hw);
5399                 else if (hw->mac.type == ixgbe_mac_X540 ||
5400                      hw->mac.type == ixgbe_mac_X550 ||
5401                      hw->mac.type == ixgbe_mac_X550EM_x ||
5402                      hw->mac.type == ixgbe_mac_X550EM_a)
5403                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5404         }
5405
5406 #ifdef RTE_LIB_SECURITY
5407         if ((dev->data->dev_conf.rxmode.offloads &
5408                         DEV_RX_OFFLOAD_SECURITY) ||
5409                 (dev->data->dev_conf.txmode.offloads &
5410                         DEV_TX_OFFLOAD_SECURITY)) {
5411                 ret = ixgbe_crypto_enable_ipsec(dev);
5412                 if (ret != 0) {
5413                         PMD_DRV_LOG(ERR,
5414                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5415                                     ret);
5416                         return ret;
5417                 }
5418         }
5419 #endif
5420
5421         return 0;
5422 }
5423
5424 /*
5425  * Start Receive Units for specified queue.
5426  */
5427 int __rte_cold
5428 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5429 {
5430         struct ixgbe_hw     *hw;
5431         struct ixgbe_rx_queue *rxq;
5432         uint32_t rxdctl;
5433         int poll_ms;
5434
5435         PMD_INIT_FUNC_TRACE();
5436         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5437
5438         rxq = dev->data->rx_queues[rx_queue_id];
5439
5440         /* Allocate buffers for descriptor rings */
5441         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5442                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5443                              rx_queue_id);
5444                 return -1;
5445         }
5446         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5447         rxdctl |= IXGBE_RXDCTL_ENABLE;
5448         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5449
5450         /* Wait until RX Enable ready */
5451         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5452         do {
5453                 rte_delay_ms(1);
5454                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5455         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5456         if (!poll_ms)
5457                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5458         rte_wmb();
5459         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5460         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5461         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5462
5463         return 0;
5464 }
5465
5466 /*
5467  * Stop Receive Units for specified queue.
5468  */
5469 int __rte_cold
5470 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5471 {
5472         struct ixgbe_hw     *hw;
5473         struct ixgbe_adapter *adapter = dev->data->dev_private;
5474         struct ixgbe_rx_queue *rxq;
5475         uint32_t rxdctl;
5476         int poll_ms;
5477
5478         PMD_INIT_FUNC_TRACE();
5479         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5480
5481         rxq = dev->data->rx_queues[rx_queue_id];
5482
5483         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5484         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5485         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5486
5487         /* Wait until RX Enable bit clear */
5488         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5489         do {
5490                 rte_delay_ms(1);
5491                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5492         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5493         if (!poll_ms)
5494                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5495
5496         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5497
5498         ixgbe_rx_queue_release_mbufs(rxq);
5499         ixgbe_reset_rx_queue(adapter, rxq);
5500         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5501
5502         return 0;
5503 }
5504
5505
5506 /*
5507  * Start Transmit Units for specified queue.
5508  */
5509 int __rte_cold
5510 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5511 {
5512         struct ixgbe_hw     *hw;
5513         struct ixgbe_tx_queue *txq;
5514         uint32_t txdctl;
5515         int poll_ms;
5516
5517         PMD_INIT_FUNC_TRACE();
5518         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5519
5520         txq = dev->data->tx_queues[tx_queue_id];
5521         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5522         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5523         txdctl |= IXGBE_TXDCTL_ENABLE;
5524         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5525
5526         /* Wait until TX Enable ready */
5527         if (hw->mac.type == ixgbe_mac_82599EB) {
5528                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5529                 do {
5530                         rte_delay_ms(1);
5531                         txdctl = IXGBE_READ_REG(hw,
5532                                 IXGBE_TXDCTL(txq->reg_idx));
5533                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5534                 if (!poll_ms)
5535                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5536                                 tx_queue_id);
5537         }
5538         rte_wmb();
5539         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5540         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5541
5542         return 0;
5543 }
5544
5545 /*
5546  * Stop Transmit Units for specified queue.
5547  */
5548 int __rte_cold
5549 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5550 {
5551         struct ixgbe_hw     *hw;
5552         struct ixgbe_tx_queue *txq;
5553         uint32_t txdctl;
5554         uint32_t txtdh, txtdt;
5555         int poll_ms;
5556
5557         PMD_INIT_FUNC_TRACE();
5558         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5559
5560         txq = dev->data->tx_queues[tx_queue_id];
5561
5562         /* Wait until TX queue is empty */
5563         if (hw->mac.type == ixgbe_mac_82599EB) {
5564                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5565                 do {
5566                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5567                         txtdh = IXGBE_READ_REG(hw,
5568                                                IXGBE_TDH(txq->reg_idx));
5569                         txtdt = IXGBE_READ_REG(hw,
5570                                                IXGBE_TDT(txq->reg_idx));
5571                 } while (--poll_ms && (txtdh != txtdt));
5572                 if (!poll_ms)
5573                         PMD_INIT_LOG(ERR,
5574                                 "Tx Queue %d is not empty when stopping.",
5575                                 tx_queue_id);
5576         }
5577
5578         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5579         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5580         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5581
5582         /* Wait until TX Enable bit clear */
5583         if (hw->mac.type == ixgbe_mac_82599EB) {
5584                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5585                 do {
5586                         rte_delay_ms(1);
5587                         txdctl = IXGBE_READ_REG(hw,
5588                                                 IXGBE_TXDCTL(txq->reg_idx));
5589                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5590                 if (!poll_ms)
5591                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5592                                 tx_queue_id);
5593         }
5594
5595         if (txq->ops != NULL) {
5596                 txq->ops->release_mbufs(txq);
5597                 txq->ops->reset(txq);
5598         }
5599         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5600
5601         return 0;
5602 }
5603
5604 void
5605 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5606         struct rte_eth_rxq_info *qinfo)
5607 {
5608         struct ixgbe_rx_queue *rxq;
5609
5610         rxq = dev->data->rx_queues[queue_id];
5611
5612         qinfo->mp = rxq->mb_pool;
5613         qinfo->scattered_rx = dev->data->scattered_rx;
5614         qinfo->nb_desc = rxq->nb_rx_desc;
5615
5616         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5617         qinfo->conf.rx_drop_en = rxq->drop_en;
5618         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5619         qinfo->conf.offloads = rxq->offloads;
5620 }
5621
5622 void
5623 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5624         struct rte_eth_txq_info *qinfo)
5625 {
5626         struct ixgbe_tx_queue *txq;
5627
5628         txq = dev->data->tx_queues[queue_id];
5629
5630         qinfo->nb_desc = txq->nb_tx_desc;
5631
5632         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5633         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5634         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5635
5636         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5637         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5638         qinfo->conf.offloads = txq->offloads;
5639         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5640 }
5641
5642 /*
5643  * [VF] Initializes Receive Unit.
5644  */
5645 int __rte_cold
5646 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5647 {
5648         struct ixgbe_hw     *hw;
5649         struct ixgbe_rx_queue *rxq;
5650         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5651         uint64_t bus_addr;
5652         uint32_t srrctl, psrtype = 0;
5653         uint16_t buf_size;
5654         uint16_t i;
5655         int ret;
5656
5657         PMD_INIT_FUNC_TRACE();
5658         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5659
5660         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5661                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5662                         "it should be power of 2");
5663                 return -1;
5664         }
5665
5666         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5667                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5668                         "it should be equal to or less than %d",
5669                         hw->mac.max_rx_queues);
5670                 return -1;
5671         }
5672
5673         /*
5674          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5675          * disables the VF receipt of packets if the PF MTU is > 1500.
5676          * This is done to deal with 82599 limitations that imposes
5677          * the PF and all VFs to share the same MTU.
5678          * Then, the PF driver enables again the VF receipt of packet when
5679          * the VF driver issues a IXGBE_VF_SET_LPE request.
5680          * In the meantime, the VF device cannot be used, even if the VF driver
5681          * and the Guest VM network stack are ready to accept packets with a
5682          * size up to the PF MTU.
5683          * As a work-around to this PF behaviour, force the call to
5684          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5685          * VF packets received can work in all cases.
5686          */
5687         if (ixgbevf_rlpml_set_vf(hw,
5688             (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len)) {
5689                 PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5690                              dev->data->dev_conf.rxmode.max_rx_pkt_len);
5691                 return -EINVAL;
5692         }
5693
5694         /*
5695          * Assume no header split and no VLAN strip support
5696          * on any Rx queue first .
5697          */
5698         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5699         /* Setup RX queues */
5700         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5701                 rxq = dev->data->rx_queues[i];
5702
5703                 /* Allocate buffers for descriptor rings */
5704                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5705                 if (ret)
5706                         return ret;
5707
5708                 /* Setup the Base and Length of the Rx Descriptor Rings */
5709                 bus_addr = rxq->rx_ring_phys_addr;
5710
5711                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5712                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5713                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5714                                 (uint32_t)(bus_addr >> 32));
5715                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5716                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5717                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5718                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5719
5720
5721                 /* Configure the SRRCTL register */
5722                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5723
5724                 /* Set if packets are dropped when no descriptors available */
5725                 if (rxq->drop_en)
5726                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5727
5728                 /*
5729                  * Configure the RX buffer size in the BSIZEPACKET field of
5730                  * the SRRCTL register of the queue.
5731                  * The value is in 1 KB resolution. Valid values can be from
5732                  * 1 KB to 16 KB.
5733                  */
5734                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5735                         RTE_PKTMBUF_HEADROOM);
5736                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5737                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5738
5739                 /*
5740                  * VF modification to write virtual function SRRCTL register
5741                  */
5742                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5743
5744                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5745                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5746
5747                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5748                     /* It adds dual VLAN length for supporting dual VLAN */
5749                     (rxmode->max_rx_pkt_len +
5750                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5751                         if (!dev->data->scattered_rx)
5752                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5753                         dev->data->scattered_rx = 1;
5754                 }
5755
5756                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5757                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5758         }
5759
5760         /* Set RQPL for VF RSS according to max Rx queue */
5761         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5762                 IXGBE_PSRTYPE_RQPL_SHIFT;
5763         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5764
5765         ixgbe_set_rx_function(dev);
5766
5767         return 0;
5768 }
5769
5770 /*
5771  * [VF] Initializes Transmit Unit.
5772  */
5773 void __rte_cold
5774 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5775 {
5776         struct ixgbe_hw     *hw;
5777         struct ixgbe_tx_queue *txq;
5778         uint64_t bus_addr;
5779         uint32_t txctrl;
5780         uint16_t i;
5781
5782         PMD_INIT_FUNC_TRACE();
5783         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5784
5785         /* Setup the Base and Length of the Tx Descriptor Rings */
5786         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5787                 txq = dev->data->tx_queues[i];
5788                 bus_addr = txq->tx_ring_phys_addr;
5789                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5790                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5791                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5792                                 (uint32_t)(bus_addr >> 32));
5793                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5794                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5795                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5796                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5797                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5798
5799                 /*
5800                  * Disable Tx Head Writeback RO bit, since this hoses
5801                  * bookkeeping if things aren't delivered in order.
5802                  */
5803                 txctrl = IXGBE_READ_REG(hw,
5804                                 IXGBE_VFDCA_TXCTRL(i));
5805                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5806                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5807                                 txctrl);
5808         }
5809 }
5810
5811 /*
5812  * [VF] Start Transmit and Receive Units.
5813  */
5814 void __rte_cold
5815 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5816 {
5817         struct ixgbe_hw     *hw;
5818         struct ixgbe_tx_queue *txq;
5819         struct ixgbe_rx_queue *rxq;
5820         uint32_t txdctl;
5821         uint32_t rxdctl;
5822         uint16_t i;
5823         int poll_ms;
5824
5825         PMD_INIT_FUNC_TRACE();
5826         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5827
5828         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5829                 txq = dev->data->tx_queues[i];
5830                 /* Setup Transmit Threshold Registers */
5831                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5832                 txdctl |= txq->pthresh & 0x7F;
5833                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5834                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5835                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5836         }
5837
5838         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5839
5840                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5841                 txdctl |= IXGBE_TXDCTL_ENABLE;
5842                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5843
5844                 poll_ms = 10;
5845                 /* Wait until TX Enable ready */
5846                 do {
5847                         rte_delay_ms(1);
5848                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5849                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5850                 if (!poll_ms)
5851                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5852         }
5853         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5854
5855                 rxq = dev->data->rx_queues[i];
5856
5857                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5858                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5859                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5860
5861                 /* Wait until RX Enable ready */
5862                 poll_ms = 10;
5863                 do {
5864                         rte_delay_ms(1);
5865                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5866                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5867                 if (!poll_ms)
5868                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5869                 rte_wmb();
5870                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5871
5872         }
5873 }
5874
5875 int
5876 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5877                     const struct rte_flow_action_rss *in)
5878 {
5879         if (in->key_len > RTE_DIM(out->key) ||
5880             in->queue_num > RTE_DIM(out->queue))
5881                 return -EINVAL;
5882         out->conf = (struct rte_flow_action_rss){
5883                 .func = in->func,
5884                 .level = in->level,
5885                 .types = in->types,
5886                 .key_len = in->key_len,
5887                 .queue_num = in->queue_num,
5888                 .key = memcpy(out->key, in->key, in->key_len),
5889                 .queue = memcpy(out->queue, in->queue,
5890                                 sizeof(*in->queue) * in->queue_num),
5891         };
5892         return 0;
5893 }
5894
5895 int
5896 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5897                       const struct rte_flow_action_rss *with)
5898 {
5899         return (comp->func == with->func &&
5900                 comp->level == with->level &&
5901                 comp->types == with->types &&
5902                 comp->key_len == with->key_len &&
5903                 comp->queue_num == with->queue_num &&
5904                 !memcmp(comp->key, with->key, with->key_len) &&
5905                 !memcmp(comp->queue, with->queue,
5906                         sizeof(*with->queue) * with->queue_num));
5907 }
5908
5909 int
5910 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5911                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5912 {
5913         struct ixgbe_hw *hw;
5914         uint32_t reta;
5915         uint16_t i;
5916         uint16_t j;
5917         uint16_t sp_reta_size;
5918         uint32_t reta_reg;
5919         struct rte_eth_rss_conf rss_conf = {
5920                 .rss_key = conf->conf.key_len ?
5921                         (void *)(uintptr_t)conf->conf.key : NULL,
5922                 .rss_key_len = conf->conf.key_len,
5923                 .rss_hf = conf->conf.types,
5924         };
5925         struct ixgbe_filter_info *filter_info =
5926                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5927
5928         PMD_INIT_FUNC_TRACE();
5929         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5930
5931         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5932
5933         if (!add) {
5934                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5935                                           &conf->conf)) {
5936                         ixgbe_rss_disable(dev);
5937                         memset(&filter_info->rss_info, 0,
5938                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5939                         return 0;
5940                 }
5941                 return -EINVAL;
5942         }
5943
5944         if (filter_info->rss_info.conf.queue_num)
5945                 return -EINVAL;
5946         /* Fill in redirection table
5947          * The byte-swap is needed because NIC registers are in
5948          * little-endian order.
5949          */
5950         reta = 0;
5951         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5952                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5953
5954                 if (j == conf->conf.queue_num)
5955                         j = 0;
5956                 reta = (reta << 8) | conf->conf.queue[j];
5957                 if ((i & 3) == 3)
5958                         IXGBE_WRITE_REG(hw, reta_reg,
5959                                         rte_bswap32(reta));
5960         }
5961
5962         /* Configure the RSS key and the RSS protocols used to compute
5963          * the RSS hash of input packets.
5964          */
5965         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5966                 ixgbe_rss_disable(dev);
5967                 return 0;
5968         }
5969         if (rss_conf.rss_key == NULL)
5970                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5971         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5972
5973         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5974                 return -EINVAL;
5975
5976         return 0;
5977 }
5978
5979 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5980 #if defined(RTE_ARCH_PPC_64)
5981 int
5982 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5983 {
5984         return -1;
5985 }
5986
5987 uint16_t
5988 ixgbe_recv_pkts_vec(
5989         void __rte_unused *rx_queue,
5990         struct rte_mbuf __rte_unused **rx_pkts,
5991         uint16_t __rte_unused nb_pkts)
5992 {
5993         return 0;
5994 }
5995
5996 uint16_t
5997 ixgbe_recv_scattered_pkts_vec(
5998         void __rte_unused *rx_queue,
5999         struct rte_mbuf __rte_unused **rx_pkts,
6000         uint16_t __rte_unused nb_pkts)
6001 {
6002         return 0;
6003 }
6004
6005 int
6006 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
6007 {
6008         return -1;
6009 }
6010
6011 uint16_t
6012 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
6013                 struct rte_mbuf __rte_unused **tx_pkts,
6014                 uint16_t __rte_unused nb_pkts)
6015 {
6016         return 0;
6017 }
6018
6019 int
6020 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
6021 {
6022         return -1;
6023 }
6024
6025 void
6026 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
6027 {
6028         return;
6029 }
6030 #endif