net/ice: fix VLAN 0 adding based on VLAN mode
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
39 #include <rte_udp.h>
40 #include <rte_tcp.h>
41 #include <rte_sctp.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
44 #include <rte_ip.h>
45 #include <rte_net.h>
46 #include <rte_vect.h>
47
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
55
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
58 #else
59 #define IXGBE_TX_IEEE1588_TMST 0
60 #endif
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK (                  \
63                 PKT_TX_OUTER_IPV6 |              \
64                 PKT_TX_OUTER_IPV4 |              \
65                 PKT_TX_IPV6 |                    \
66                 PKT_TX_IPV4 |                    \
67                 PKT_TX_VLAN_PKT |                \
68                 PKT_TX_IP_CKSUM |                \
69                 PKT_TX_L4_MASK |                 \
70                 PKT_TX_TCP_SEG |                 \
71                 PKT_TX_MACSEC |                  \
72                 PKT_TX_OUTER_IP_CKSUM |          \
73                 PKT_TX_SEC_OFFLOAD |     \
74                 IXGBE_TX_IEEE1588_TMST)
75
76 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
77                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
78
79 #if 1
80 #define RTE_PMD_USE_PREFETCH
81 #endif
82
83 #ifdef RTE_PMD_USE_PREFETCH
84 /*
85  * Prefetch a cache line into all cache levels.
86  */
87 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
88 #else
89 #define rte_ixgbe_prefetch(p)   do {} while (0)
90 #endif
91
92 /*********************************************************************
93  *
94  *  TX functions
95  *
96  **********************************************************************/
97
98 /*
99  * Check for descriptors with their DD bit set and free mbufs.
100  * Return the total number of buffers freed.
101  */
102 static __rte_always_inline int
103 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
104 {
105         struct ixgbe_tx_entry *txep;
106         uint32_t status;
107         int i, nb_free = 0;
108         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
109
110         /* check DD bit on threshold descriptor */
111         status = txq->tx_ring[txq->tx_next_dd].wb.status;
112         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
113                 return 0;
114
115         /*
116          * first buffer to free from S/W ring is at index
117          * tx_next_dd - (tx_rs_thresh-1)
118          */
119         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
120
121         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
122                 /* free buffers one at a time */
123                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
124                 txep->mbuf = NULL;
125
126                 if (unlikely(m == NULL))
127                         continue;
128
129                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
130                     (nb_free > 0 && m->pool != free[0]->pool)) {
131                         rte_mempool_put_bulk(free[0]->pool,
132                                              (void **)free, nb_free);
133                         nb_free = 0;
134                 }
135
136                 free[nb_free++] = m;
137         }
138
139         if (nb_free > 0)
140                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
141
142         /* buffers were freed, update counters */
143         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
144         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
145         if (txq->tx_next_dd >= txq->nb_tx_desc)
146                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
147
148         return txq->tx_rs_thresh;
149 }
150
151 /* Populate 4 descriptors with data from 4 mbufs */
152 static inline void
153 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
154 {
155         uint64_t buf_dma_addr;
156         uint32_t pkt_len;
157         int i;
158
159         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
160                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
161                 pkt_len = (*pkts)->data_len;
162
163                 /* write data to descriptor */
164                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
165
166                 txdp->read.cmd_type_len =
167                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
168
169                 txdp->read.olinfo_status =
170                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
171
172                 rte_prefetch0(&(*pkts)->pool);
173         }
174 }
175
176 /* Populate 1 descriptor with data from 1 mbuf */
177 static inline void
178 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
179 {
180         uint64_t buf_dma_addr;
181         uint32_t pkt_len;
182
183         buf_dma_addr = rte_mbuf_data_iova(*pkts);
184         pkt_len = (*pkts)->data_len;
185
186         /* write data to descriptor */
187         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
188         txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190         txdp->read.olinfo_status =
191                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
192         rte_prefetch0(&(*pkts)->pool);
193 }
194
195 /*
196  * Fill H/W descriptor ring with mbuf data.
197  * Copy mbuf pointers to the S/W ring.
198  */
199 static inline void
200 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
201                       uint16_t nb_pkts)
202 {
203         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
204         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
205         const int N_PER_LOOP = 4;
206         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
207         int mainpart, leftover;
208         int i, j;
209
210         /*
211          * Process most of the packets in chunks of N pkts.  Any
212          * leftover packets will get processed one at a time.
213          */
214         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
215         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
216         for (i = 0; i < mainpart; i += N_PER_LOOP) {
217                 /* Copy N mbuf pointers to the S/W ring */
218                 for (j = 0; j < N_PER_LOOP; ++j) {
219                         (txep + i + j)->mbuf = *(pkts + i + j);
220                 }
221                 tx4(txdp + i, pkts + i);
222         }
223
224         if (unlikely(leftover > 0)) {
225                 for (i = 0; i < leftover; ++i) {
226                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
227                         tx1(txdp + mainpart + i, pkts + mainpart + i);
228                 }
229         }
230 }
231
232 static inline uint16_t
233 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
234              uint16_t nb_pkts)
235 {
236         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
237         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
238         uint16_t n = 0;
239
240         /*
241          * Begin scanning the H/W ring for done descriptors when the
242          * number of available descriptors drops below tx_free_thresh.  For
243          * each done descriptor, free the associated buffer.
244          */
245         if (txq->nb_tx_free < txq->tx_free_thresh)
246                 ixgbe_tx_free_bufs(txq);
247
248         /* Only use descriptors that are available */
249         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
250         if (unlikely(nb_pkts == 0))
251                 return 0;
252
253         /* Use exactly nb_pkts descriptors */
254         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
255
256         /*
257          * At this point, we know there are enough descriptors in the
258          * ring to transmit all the packets.  This assumes that each
259          * mbuf contains a single segment, and that no new offloads
260          * are expected, which would require a new context descriptor.
261          */
262
263         /*
264          * See if we're going to wrap-around. If so, handle the top
265          * of the descriptor ring first, then do the bottom.  If not,
266          * the processing looks just like the "bottom" part anyway...
267          */
268         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
269                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
270                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
271
272                 /*
273                  * We know that the last descriptor in the ring will need to
274                  * have its RS bit set because tx_rs_thresh has to be
275                  * a divisor of the ring size
276                  */
277                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
278                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
279                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
280
281                 txq->tx_tail = 0;
282         }
283
284         /* Fill H/W descriptor ring with mbuf data */
285         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
286         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
287
288         /*
289          * Determine if RS bit should be set
290          * This is what we actually want:
291          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
292          * but instead of subtracting 1 and doing >=, we can just do
293          * greater than without subtracting.
294          */
295         if (txq->tx_tail > txq->tx_next_rs) {
296                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
297                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
298                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
299                                                 txq->tx_rs_thresh);
300                 if (txq->tx_next_rs >= txq->nb_tx_desc)
301                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302         }
303
304         /*
305          * Check for wrap-around. This would only happen if we used
306          * up to the last descriptor in the ring, no more, no less.
307          */
308         if (txq->tx_tail >= txq->nb_tx_desc)
309                 txq->tx_tail = 0;
310
311         /* update tail pointer */
312         rte_wmb();
313         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
314
315         return nb_pkts;
316 }
317
318 uint16_t
319 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
320                        uint16_t nb_pkts)
321 {
322         uint16_t nb_tx;
323
324         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
325         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
326                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
327
328         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
329         nb_tx = 0;
330         while (nb_pkts) {
331                 uint16_t ret, n;
332
333                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
334                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
335                 nb_tx = (uint16_t)(nb_tx + ret);
336                 nb_pkts = (uint16_t)(nb_pkts - ret);
337                 if (ret < n)
338                         break;
339         }
340
341         return nb_tx;
342 }
343
344 static uint16_t
345 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
346                     uint16_t nb_pkts)
347 {
348         uint16_t nb_tx = 0;
349         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
350
351         while (nb_pkts) {
352                 uint16_t ret, num;
353
354                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
355                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
356                                                  num);
357                 nb_tx += ret;
358                 nb_pkts -= ret;
359                 if (ret < num)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
370                 __rte_unused uint64_t *mdata)
371 {
372         uint32_t type_tucmd_mlhl;
373         uint32_t mss_l4len_idx = 0;
374         uint32_t ctx_idx;
375         uint32_t vlan_macip_lens;
376         union ixgbe_tx_offload tx_offload_mask;
377         uint32_t seqnum_seed = 0;
378
379         ctx_idx = txq->ctx_curr;
380         tx_offload_mask.data[0] = 0;
381         tx_offload_mask.data[1] = 0;
382         type_tucmd_mlhl = 0;
383
384         /* Specify which HW CTX to upload. */
385         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
386
387         if (ol_flags & PKT_TX_VLAN_PKT) {
388                 tx_offload_mask.vlan_tci |= ~0;
389         }
390
391         /* check if TCP segmentation required for this packet */
392         if (ol_flags & PKT_TX_TCP_SEG) {
393                 /* implies IP cksum in IPv4 */
394                 if (ol_flags & PKT_TX_IP_CKSUM)
395                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
396                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
397                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
398                 else
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
400                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
401                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
402
403                 tx_offload_mask.l2_len |= ~0;
404                 tx_offload_mask.l3_len |= ~0;
405                 tx_offload_mask.l4_len |= ~0;
406                 tx_offload_mask.tso_segsz |= ~0;
407                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
408                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
409         } else { /* no TSO, check if hardware checksum is needed */
410                 if (ol_flags & PKT_TX_IP_CKSUM) {
411                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
412                         tx_offload_mask.l2_len |= ~0;
413                         tx_offload_mask.l3_len |= ~0;
414                 }
415
416                 switch (ol_flags & PKT_TX_L4_MASK) {
417                 case PKT_TX_UDP_CKSUM:
418                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
419                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
421                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
422                         tx_offload_mask.l2_len |= ~0;
423                         tx_offload_mask.l3_len |= ~0;
424                         break;
425                 case PKT_TX_TCP_CKSUM:
426                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
429                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
430                         tx_offload_mask.l2_len |= ~0;
431                         tx_offload_mask.l3_len |= ~0;
432                         break;
433                 case PKT_TX_SCTP_CKSUM:
434                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
435                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
436                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
437                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
438                         tx_offload_mask.l2_len |= ~0;
439                         tx_offload_mask.l3_len |= ~0;
440                         break;
441                 default:
442                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
443                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
444                         break;
445                 }
446         }
447
448         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
449                 tx_offload_mask.outer_l2_len |= ~0;
450                 tx_offload_mask.outer_l3_len |= ~0;
451                 tx_offload_mask.l2_len |= ~0;
452                 seqnum_seed |= tx_offload.outer_l3_len
453                                << IXGBE_ADVTXD_OUTER_IPLEN;
454                 seqnum_seed |= tx_offload.l2_len
455                                << IXGBE_ADVTXD_TUNNEL_LEN;
456         }
457 #ifdef RTE_LIB_SECURITY
458         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
459                 union ixgbe_crypto_tx_desc_md *md =
460                                 (union ixgbe_crypto_tx_desc_md *)mdata;
461                 seqnum_seed |=
462                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
463                 type_tucmd_mlhl |= md->enc ?
464                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
465                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
466                 type_tucmd_mlhl |=
467                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
468                 tx_offload_mask.sa_idx |= ~0;
469                 tx_offload_mask.sec_pad_len |= ~0;
470         }
471 #endif
472
473         txq->ctx_cache[ctx_idx].flags = ol_flags;
474         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
475                 tx_offload_mask.data[0] & tx_offload.data[0];
476         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
477                 tx_offload_mask.data[1] & tx_offload.data[1];
478         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
479
480         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
481         vlan_macip_lens = tx_offload.l3_len;
482         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
483                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
484                                     IXGBE_ADVTXD_MACLEN_SHIFT);
485         else
486                 vlan_macip_lens |= (tx_offload.l2_len <<
487                                     IXGBE_ADVTXD_MACLEN_SHIFT);
488         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
489         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
490         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
491         ctx_txd->seqnum_seed     = seqnum_seed;
492 }
493
494 /*
495  * Check which hardware context can be used. Use the existing match
496  * or create a new context descriptor.
497  */
498 static inline uint32_t
499 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
500                    union ixgbe_tx_offload tx_offload)
501 {
502         /* If match with the current used context */
503         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
504                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
505                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
506                      & tx_offload.data[0])) &&
507                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
508                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
509                      & tx_offload.data[1]))))
510                 return txq->ctx_curr;
511
512         /* What if match with the next context  */
513         txq->ctx_curr ^= 1;
514         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
515                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
516                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
517                      & tx_offload.data[0])) &&
518                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
519                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
520                      & tx_offload.data[1]))))
521                 return txq->ctx_curr;
522
523         /* Mismatch, use the previous context */
524         return IXGBE_CTX_NUM;
525 }
526
527 static inline uint32_t
528 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
529 {
530         uint32_t tmp = 0;
531
532         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
534         if (ol_flags & PKT_TX_IP_CKSUM)
535                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
536         if (ol_flags & PKT_TX_TCP_SEG)
537                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
538         return tmp;
539 }
540
541 static inline uint32_t
542 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
543 {
544         uint32_t cmdtype = 0;
545
546         if (ol_flags & PKT_TX_VLAN_PKT)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
548         if (ol_flags & PKT_TX_TCP_SEG)
549                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
550         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
551                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
552         if (ol_flags & PKT_TX_MACSEC)
553                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
554         return cmdtype;
555 }
556
557 /* Default RS bit threshold values */
558 #ifndef DEFAULT_TX_RS_THRESH
559 #define DEFAULT_TX_RS_THRESH   32
560 #endif
561 #ifndef DEFAULT_TX_FREE_THRESH
562 #define DEFAULT_TX_FREE_THRESH 32
563 #endif
564
565 /* Reset transmit descriptors after they have been used */
566 static inline int
567 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
568 {
569         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
570         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
571         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
572         uint16_t nb_tx_desc = txq->nb_tx_desc;
573         uint16_t desc_to_clean_to;
574         uint16_t nb_tx_to_clean;
575         uint32_t status;
576
577         /* Determine the last descriptor needing to be cleaned */
578         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
579         if (desc_to_clean_to >= nb_tx_desc)
580                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
581
582         /* Check to make sure the last descriptor to clean is done */
583         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
584         status = txr[desc_to_clean_to].wb.status;
585         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
586                 PMD_TX_FREE_LOG(DEBUG,
587                                 "TX descriptor %4u is not done"
588                                 "(port=%d queue=%d)",
589                                 desc_to_clean_to,
590                                 txq->port_id, txq->queue_id);
591                 /* Failed to clean any descriptors, better luck next time */
592                 return -(1);
593         }
594
595         /* Figure out how many descriptors will be cleaned */
596         if (last_desc_cleaned > desc_to_clean_to)
597                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
598                                                         desc_to_clean_to);
599         else
600                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
601                                                 last_desc_cleaned);
602
603         PMD_TX_FREE_LOG(DEBUG,
604                         "Cleaning %4u TX descriptors: %4u to %4u "
605                         "(port=%d queue=%d)",
606                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
607                         txq->port_id, txq->queue_id);
608
609         /*
610          * The last descriptor to clean is done, so that means all the
611          * descriptors from the last descriptor that was cleaned
612          * up to the last descriptor with the RS bit set
613          * are done. Only reset the threshold descriptor.
614          */
615         txr[desc_to_clean_to].wb.status = 0;
616
617         /* Update the txq to reflect the last descriptor that was cleaned */
618         txq->last_desc_cleaned = desc_to_clean_to;
619         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
620
621         /* No Error */
622         return 0;
623 }
624
625 uint16_t
626 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
627                 uint16_t nb_pkts)
628 {
629         struct ixgbe_tx_queue *txq;
630         struct ixgbe_tx_entry *sw_ring;
631         struct ixgbe_tx_entry *txe, *txn;
632         volatile union ixgbe_adv_tx_desc *txr;
633         volatile union ixgbe_adv_tx_desc *txd, *txp;
634         struct rte_mbuf     *tx_pkt;
635         struct rte_mbuf     *m_seg;
636         uint64_t buf_dma_addr;
637         uint32_t olinfo_status;
638         uint32_t cmd_type_len;
639         uint32_t pkt_len;
640         uint16_t slen;
641         uint64_t ol_flags;
642         uint16_t tx_id;
643         uint16_t tx_last;
644         uint16_t nb_tx;
645         uint16_t nb_used;
646         uint64_t tx_ol_req;
647         uint32_t ctx = 0;
648         uint32_t new_ctx;
649         union ixgbe_tx_offload tx_offload;
650 #ifdef RTE_LIB_SECURITY
651         uint8_t use_ipsec;
652 #endif
653
654         tx_offload.data[0] = 0;
655         tx_offload.data[1] = 0;
656         txq = tx_queue;
657         sw_ring = txq->sw_ring;
658         txr     = txq->tx_ring;
659         tx_id   = txq->tx_tail;
660         txe = &sw_ring[tx_id];
661         txp = NULL;
662
663         /* Determine if the descriptor ring needs to be cleaned. */
664         if (txq->nb_tx_free < txq->tx_free_thresh)
665                 ixgbe_xmit_cleanup(txq);
666
667         rte_prefetch0(&txe->mbuf->pool);
668
669         /* TX loop */
670         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
671                 new_ctx = 0;
672                 tx_pkt = *tx_pkts++;
673                 pkt_len = tx_pkt->pkt_len;
674
675                 /*
676                  * Determine how many (if any) context descriptors
677                  * are needed for offload functionality.
678                  */
679                 ol_flags = tx_pkt->ol_flags;
680 #ifdef RTE_LIB_SECURITY
681                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
682 #endif
683
684                 /* If hardware offload required */
685                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
686                 if (tx_ol_req) {
687                         tx_offload.l2_len = tx_pkt->l2_len;
688                         tx_offload.l3_len = tx_pkt->l3_len;
689                         tx_offload.l4_len = tx_pkt->l4_len;
690                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
691                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
692                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
693                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
694 #ifdef RTE_LIB_SECURITY
695                         if (use_ipsec) {
696                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
697                                         (union ixgbe_crypto_tx_desc_md *)
698                                                 rte_security_dynfield(tx_pkt);
699                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
700                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
701                         }
702 #endif
703
704                         /* If new context need be built or reuse the exist ctx. */
705                         ctx = what_advctx_update(txq, tx_ol_req,
706                                 tx_offload);
707                         /* Only allocate context descriptor if required*/
708                         new_ctx = (ctx == IXGBE_CTX_NUM);
709                         ctx = txq->ctx_curr;
710                 }
711
712                 /*
713                  * Keep track of how many descriptors are used this loop
714                  * This will always be the number of segments + the number of
715                  * Context descriptors required to transmit the packet
716                  */
717                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
718
719                 if (txp != NULL &&
720                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
721                         /* set RS on the previous packet in the burst */
722                         txp->read.cmd_type_len |=
723                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
724
725                 /*
726                  * The number of descriptors that must be allocated for a
727                  * packet is the number of segments of that packet, plus 1
728                  * Context Descriptor for the hardware offload, if any.
729                  * Determine the last TX descriptor to allocate in the TX ring
730                  * for the packet, starting from the current position (tx_id)
731                  * in the ring.
732                  */
733                 tx_last = (uint16_t) (tx_id + nb_used - 1);
734
735                 /* Circular ring */
736                 if (tx_last >= txq->nb_tx_desc)
737                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
738
739                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
740                            " tx_first=%u tx_last=%u",
741                            (unsigned) txq->port_id,
742                            (unsigned) txq->queue_id,
743                            (unsigned) pkt_len,
744                            (unsigned) tx_id,
745                            (unsigned) tx_last);
746
747                 /*
748                  * Make sure there are enough TX descriptors available to
749                  * transmit the entire packet.
750                  * nb_used better be less than or equal to txq->tx_rs_thresh
751                  */
752                 if (nb_used > txq->nb_tx_free) {
753                         PMD_TX_FREE_LOG(DEBUG,
754                                         "Not enough free TX descriptors "
755                                         "nb_used=%4u nb_free=%4u "
756                                         "(port=%d queue=%d)",
757                                         nb_used, txq->nb_tx_free,
758                                         txq->port_id, txq->queue_id);
759
760                         if (ixgbe_xmit_cleanup(txq) != 0) {
761                                 /* Could not clean any descriptors */
762                                 if (nb_tx == 0)
763                                         return 0;
764                                 goto end_of_tx;
765                         }
766
767                         /* nb_used better be <= txq->tx_rs_thresh */
768                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
769                                 PMD_TX_FREE_LOG(DEBUG,
770                                         "The number of descriptors needed to "
771                                         "transmit the packet exceeds the "
772                                         "RS bit threshold. This will impact "
773                                         "performance."
774                                         "nb_used=%4u nb_free=%4u "
775                                         "tx_rs_thresh=%4u. "
776                                         "(port=%d queue=%d)",
777                                         nb_used, txq->nb_tx_free,
778                                         txq->tx_rs_thresh,
779                                         txq->port_id, txq->queue_id);
780                                 /*
781                                  * Loop here until there are enough TX
782                                  * descriptors or until the ring cannot be
783                                  * cleaned.
784                                  */
785                                 while (nb_used > txq->nb_tx_free) {
786                                         if (ixgbe_xmit_cleanup(txq) != 0) {
787                                                 /*
788                                                  * Could not clean any
789                                                  * descriptors
790                                                  */
791                                                 if (nb_tx == 0)
792                                                         return 0;
793                                                 goto end_of_tx;
794                                         }
795                                 }
796                         }
797                 }
798
799                 /*
800                  * By now there are enough free TX descriptors to transmit
801                  * the packet.
802                  */
803
804                 /*
805                  * Set common flags of all TX Data Descriptors.
806                  *
807                  * The following bits must be set in all Data Descriptors:
808                  *   - IXGBE_ADVTXD_DTYP_DATA
809                  *   - IXGBE_ADVTXD_DCMD_DEXT
810                  *
811                  * The following bits must be set in the first Data Descriptor
812                  * and are ignored in the other ones:
813                  *   - IXGBE_ADVTXD_DCMD_IFCS
814                  *   - IXGBE_ADVTXD_MAC_1588
815                  *   - IXGBE_ADVTXD_DCMD_VLE
816                  *
817                  * The following bits must only be set in the last Data
818                  * Descriptor:
819                  *   - IXGBE_TXD_CMD_EOP
820                  *
821                  * The following bits can be set in any Data Descriptor, but
822                  * are only set in the last Data Descriptor:
823                  *   - IXGBE_TXD_CMD_RS
824                  */
825                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
826                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
827
828 #ifdef RTE_LIBRTE_IEEE1588
829                 if (ol_flags & PKT_TX_IEEE1588_TMST)
830                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
831 #endif
832
833                 olinfo_status = 0;
834                 if (tx_ol_req) {
835
836                         if (ol_flags & PKT_TX_TCP_SEG) {
837                                 /* when TSO is on, paylen in descriptor is the
838                                  * not the packet len but the tcp payload len */
839                                 pkt_len -= (tx_offload.l2_len +
840                                         tx_offload.l3_len + tx_offload.l4_len);
841                         }
842
843                         /*
844                          * Setup the TX Advanced Context Descriptor if required
845                          */
846                         if (new_ctx) {
847                                 volatile struct ixgbe_adv_tx_context_desc *
848                                     ctx_txd;
849
850                                 ctx_txd = (volatile struct
851                                     ixgbe_adv_tx_context_desc *)
852                                     &txr[tx_id];
853
854                                 txn = &sw_ring[txe->next_id];
855                                 rte_prefetch0(&txn->mbuf->pool);
856
857                                 if (txe->mbuf != NULL) {
858                                         rte_pktmbuf_free_seg(txe->mbuf);
859                                         txe->mbuf = NULL;
860                                 }
861
862                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
863                                         tx_offload,
864                                         rte_security_dynfield(tx_pkt));
865
866                                 txe->last_id = tx_last;
867                                 tx_id = txe->next_id;
868                                 txe = txn;
869                         }
870
871                         /*
872                          * Setup the TX Advanced Data Descriptor,
873                          * This path will go through
874                          * whatever new/reuse the context descriptor
875                          */
876                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
877                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
878                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
879                 }
880
881                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
882 #ifdef RTE_LIB_SECURITY
883                 if (use_ipsec)
884                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
885 #endif
886
887                 m_seg = tx_pkt;
888                 do {
889                         txd = &txr[tx_id];
890                         txn = &sw_ring[txe->next_id];
891                         rte_prefetch0(&txn->mbuf->pool);
892
893                         if (txe->mbuf != NULL)
894                                 rte_pktmbuf_free_seg(txe->mbuf);
895                         txe->mbuf = m_seg;
896
897                         /*
898                          * Set up Transmit Data Descriptor.
899                          */
900                         slen = m_seg->data_len;
901                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
902                         txd->read.buffer_addr =
903                                 rte_cpu_to_le_64(buf_dma_addr);
904                         txd->read.cmd_type_len =
905                                 rte_cpu_to_le_32(cmd_type_len | slen);
906                         txd->read.olinfo_status =
907                                 rte_cpu_to_le_32(olinfo_status);
908                         txe->last_id = tx_last;
909                         tx_id = txe->next_id;
910                         txe = txn;
911                         m_seg = m_seg->next;
912                 } while (m_seg != NULL);
913
914                 /*
915                  * The last packet data descriptor needs End Of Packet (EOP)
916                  */
917                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
918                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
919                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
920
921                 /* Set RS bit only on threshold packets' last descriptor */
922                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
923                         PMD_TX_FREE_LOG(DEBUG,
924                                         "Setting RS bit on TXD id="
925                                         "%4u (port=%d queue=%d)",
926                                         tx_last, txq->port_id, txq->queue_id);
927
928                         cmd_type_len |= IXGBE_TXD_CMD_RS;
929
930                         /* Update txq RS bit counters */
931                         txq->nb_tx_used = 0;
932                         txp = NULL;
933                 } else
934                         txp = txd;
935
936                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
937         }
938
939 end_of_tx:
940         /* set RS on last packet in the burst */
941         if (txp != NULL)
942                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
943
944         rte_wmb();
945
946         /*
947          * Set the Transmit Descriptor Tail (TDT)
948          */
949         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
950                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
951                    (unsigned) tx_id, (unsigned) nb_tx);
952         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
953         txq->tx_tail = tx_id;
954
955         return nb_tx;
956 }
957
958 /*********************************************************************
959  *
960  *  TX prep functions
961  *
962  **********************************************************************/
963 uint16_t
964 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
965 {
966         int i, ret;
967         uint64_t ol_flags;
968         struct rte_mbuf *m;
969         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
970
971         for (i = 0; i < nb_pkts; i++) {
972                 m = tx_pkts[i];
973                 ol_flags = m->ol_flags;
974
975                 /**
976                  * Check if packet meets requirements for number of segments
977                  *
978                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
979                  *       non-TSO
980                  */
981
982                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
983                         rte_errno = EINVAL;
984                         return i;
985                 }
986
987                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
988                         rte_errno = ENOTSUP;
989                         return i;
990                 }
991
992                 /* check the size of packet */
993                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
994                         rte_errno = EINVAL;
995                         return i;
996                 }
997
998 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
999                 ret = rte_validate_tx_offload(m);
1000                 if (ret != 0) {
1001                         rte_errno = -ret;
1002                         return i;
1003                 }
1004 #endif
1005                 ret = rte_net_intel_cksum_prepare(m);
1006                 if (ret != 0) {
1007                         rte_errno = -ret;
1008                         return i;
1009                 }
1010         }
1011
1012         return i;
1013 }
1014
1015 /*********************************************************************
1016  *
1017  *  RX functions
1018  *
1019  **********************************************************************/
1020
1021 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1022 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1023 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1024 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1025 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1030 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1031 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1032 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1033 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1036 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1037 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1041 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1045 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1049 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1054
1055 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1078
1079 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1102
1103 /**
1104  * Use 2 different table for normal packet and tunnel packet
1105  * to save the space.
1106  */
1107 const uint32_t
1108         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1109         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1110         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4,
1112         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1114         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1116         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1122         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1124         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1126         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6,
1128         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1130         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1132         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1138         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1140         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6,
1145         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1148         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1151         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6,
1157         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1160         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1163         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1175         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1185                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1187         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1188                 RTE_PTYPE_L2_ETHER |
1189                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1191 };
1192
1193 const uint32_t
1194         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1195         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1210         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1216         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1217                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1219                 RTE_PTYPE_INNER_L4_TCP,
1220         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1221                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1223                 RTE_PTYPE_INNER_L4_TCP,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1227         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1230                 RTE_PTYPE_INNER_L4_TCP,
1231         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1232                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1233                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1234                 RTE_PTYPE_INNER_L3_IPV4,
1235         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1236                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1238                 RTE_PTYPE_INNER_L4_UDP,
1239         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1242                 RTE_PTYPE_INNER_L4_UDP,
1243         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1244                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1246                 RTE_PTYPE_INNER_L4_SCTP,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1250         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1251                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1253                 RTE_PTYPE_INNER_L4_UDP,
1254         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1255                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1257                 RTE_PTYPE_INNER_L4_SCTP,
1258         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1259                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1260                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1261                 RTE_PTYPE_INNER_L3_IPV4,
1262         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1263                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1265                 RTE_PTYPE_INNER_L4_SCTP,
1266         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1267                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                 RTE_PTYPE_INNER_L4_SCTP,
1270         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1273                 RTE_PTYPE_INNER_L4_TCP,
1274         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1275                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1277                 RTE_PTYPE_INNER_L4_UDP,
1278
1279         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1282         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1283                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                 RTE_PTYPE_INNER_L3_IPV4,
1286         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1290         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                 RTE_PTYPE_INNER_L3_IPV6,
1294         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                 RTE_PTYPE_INNER_L3_IPV4,
1298         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1299                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1302         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1303                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                 RTE_PTYPE_INNER_L3_IPV4,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1307                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV4,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1319                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1323                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1324                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1325                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1335                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV4,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1343                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1347                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1351                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1352                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1353                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1354         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1355                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1358         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1359                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1362         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1363                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1366         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1367                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1370 };
1371
1372 int
1373 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1374 {
1375         volatile union ixgbe_adv_rx_desc *rxdp;
1376         struct ixgbe_rx_queue *rxq = rx_queue;
1377         uint16_t desc;
1378
1379         desc = rxq->rx_tail;
1380         rxdp = &rxq->rx_ring[desc];
1381         /* watch for changes in status bit */
1382         pmc->addr = &rxdp->wb.upper.status_error;
1383
1384         /*
1385          * we expect the DD bit to be set to 1 if this descriptor was already
1386          * written to.
1387          */
1388         pmc->val = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1389         pmc->mask = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1390
1391         /* the registers are 32-bit */
1392         pmc->size = sizeof(uint32_t);
1393
1394         return 0;
1395 }
1396
1397 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1398 static inline uint32_t
1399 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1400 {
1401
1402         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1403                 return RTE_PTYPE_UNKNOWN;
1404
1405         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1406
1407         /* For tunnel packet */
1408         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1409                 /* Remove the tunnel bit to save the space. */
1410                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1411                 return ptype_table_tn[pkt_info];
1412         }
1413
1414         /**
1415          * For x550, if it's not tunnel,
1416          * tunnel type bit should be set to 0.
1417          * Reuse 82599's mask.
1418          */
1419         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1420
1421         return ptype_table[pkt_info];
1422 }
1423
1424 static inline uint64_t
1425 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1426 {
1427         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1428                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1429                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1430                 PKT_RX_RSS_HASH, 0, 0, 0,
1431                 0, 0, 0,  PKT_RX_FDIR,
1432         };
1433 #ifdef RTE_LIBRTE_IEEE1588
1434         static uint64_t ip_pkt_etqf_map[8] = {
1435                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1436                 0, 0, 0, 0,
1437         };
1438
1439         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1440                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1441                                 ip_rss_types_map[pkt_info & 0XF];
1442         else
1443                 return ip_rss_types_map[pkt_info & 0XF];
1444 #else
1445         return ip_rss_types_map[pkt_info & 0XF];
1446 #endif
1447 }
1448
1449 static inline uint64_t
1450 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1451 {
1452         uint64_t pkt_flags;
1453
1454         /*
1455          * Check if VLAN present only.
1456          * Do not check whether L3/L4 rx checksum done by NIC or not,
1457          * That can be found from rte_eth_rxmode.offloads flag
1458          */
1459         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1460
1461 #ifdef RTE_LIBRTE_IEEE1588
1462         if (rx_status & IXGBE_RXD_STAT_TMST)
1463                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1464 #endif
1465         return pkt_flags;
1466 }
1467
1468 static inline uint64_t
1469 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1470 {
1471         uint64_t pkt_flags;
1472
1473         /*
1474          * Bit 31: IPE, IPv4 checksum error
1475          * Bit 30: L4I, L4I integrity error
1476          */
1477         static uint64_t error_to_pkt_flags_map[4] = {
1478                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1479                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1480                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1481                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1482         };
1483         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1484                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1485
1486         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1487             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1488                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1489         }
1490
1491 #ifdef RTE_LIB_SECURITY
1492         if (rx_status & IXGBE_RXD_STAT_SECP) {
1493                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1494                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1495                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1496         }
1497 #endif
1498
1499         return pkt_flags;
1500 }
1501
1502 /*
1503  * LOOK_AHEAD defines how many desc statuses to check beyond the
1504  * current descriptor.
1505  * It must be a pound define for optimal performance.
1506  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1507  * function only works with LOOK_AHEAD=8.
1508  */
1509 #define LOOK_AHEAD 8
1510 #if (LOOK_AHEAD != 8)
1511 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1512 #endif
1513 static inline int
1514 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1515 {
1516         volatile union ixgbe_adv_rx_desc *rxdp;
1517         struct ixgbe_rx_entry *rxep;
1518         struct rte_mbuf *mb;
1519         uint16_t pkt_len;
1520         uint64_t pkt_flags;
1521         int nb_dd;
1522         uint32_t s[LOOK_AHEAD];
1523         uint32_t pkt_info[LOOK_AHEAD];
1524         int i, j, nb_rx = 0;
1525         uint32_t status;
1526         uint64_t vlan_flags = rxq->vlan_flags;
1527
1528         /* get references to current descriptor and S/W ring entry */
1529         rxdp = &rxq->rx_ring[rxq->rx_tail];
1530         rxep = &rxq->sw_ring[rxq->rx_tail];
1531
1532         status = rxdp->wb.upper.status_error;
1533         /* check to make sure there is at least 1 packet to receive */
1534         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1535                 return 0;
1536
1537         /*
1538          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1539          * reference packets that are ready to be received.
1540          */
1541         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1542              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1543                 /* Read desc statuses backwards to avoid race condition */
1544                 for (j = 0; j < LOOK_AHEAD; j++)
1545                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1546
1547                 rte_smp_rmb();
1548
1549                 /* Compute how many status bits were set */
1550                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1551                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1552                         ;
1553
1554                 for (j = 0; j < nb_dd; j++)
1555                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1556                                                        lo_dword.data);
1557
1558                 nb_rx += nb_dd;
1559
1560                 /* Translate descriptor info to mbuf format */
1561                 for (j = 0; j < nb_dd; ++j) {
1562                         mb = rxep[j].mbuf;
1563                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1564                                   rxq->crc_len;
1565                         mb->data_len = pkt_len;
1566                         mb->pkt_len = pkt_len;
1567                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1568
1569                         /* convert descriptor fields to rte mbuf flags */
1570                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1571                                 vlan_flags);
1572                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1573                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1574                                         ((uint16_t)pkt_info[j]);
1575                         mb->ol_flags = pkt_flags;
1576                         mb->packet_type =
1577                                 ixgbe_rxd_pkt_info_to_pkt_type
1578                                         (pkt_info[j], rxq->pkt_type_mask);
1579
1580                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1581                                 mb->hash.rss = rte_le_to_cpu_32(
1582                                     rxdp[j].wb.lower.hi_dword.rss);
1583                         else if (pkt_flags & PKT_RX_FDIR) {
1584                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1585                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1586                                     IXGBE_ATR_HASH_MASK;
1587                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1588                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1589                         }
1590                 }
1591
1592                 /* Move mbuf pointers from the S/W ring to the stage */
1593                 for (j = 0; j < LOOK_AHEAD; ++j) {
1594                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1595                 }
1596
1597                 /* stop if all requested packets could not be received */
1598                 if (nb_dd != LOOK_AHEAD)
1599                         break;
1600         }
1601
1602         /* clear software ring entries so we can cleanup correctly */
1603         for (i = 0; i < nb_rx; ++i) {
1604                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1605         }
1606
1607
1608         return nb_rx;
1609 }
1610
1611 static inline int
1612 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1613 {
1614         volatile union ixgbe_adv_rx_desc *rxdp;
1615         struct ixgbe_rx_entry *rxep;
1616         struct rte_mbuf *mb;
1617         uint16_t alloc_idx;
1618         __le64 dma_addr;
1619         int diag, i;
1620
1621         /* allocate buffers in bulk directly into the S/W ring */
1622         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1623         rxep = &rxq->sw_ring[alloc_idx];
1624         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1625                                     rxq->rx_free_thresh);
1626         if (unlikely(diag != 0))
1627                 return -ENOMEM;
1628
1629         rxdp = &rxq->rx_ring[alloc_idx];
1630         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1631                 /* populate the static rte mbuf fields */
1632                 mb = rxep[i].mbuf;
1633                 if (reset_mbuf) {
1634                         mb->port = rxq->port_id;
1635                 }
1636
1637                 rte_mbuf_refcnt_set(mb, 1);
1638                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1639
1640                 /* populate the descriptors */
1641                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1642                 rxdp[i].read.hdr_addr = 0;
1643                 rxdp[i].read.pkt_addr = dma_addr;
1644         }
1645
1646         /* update state of internal queue structure */
1647         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1648         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1649                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1650
1651         /* no errors */
1652         return 0;
1653 }
1654
1655 static inline uint16_t
1656 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1657                          uint16_t nb_pkts)
1658 {
1659         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1660         int i;
1661
1662         /* how many packets are ready to return? */
1663         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1664
1665         /* copy mbuf pointers to the application's packet list */
1666         for (i = 0; i < nb_pkts; ++i)
1667                 rx_pkts[i] = stage[i];
1668
1669         /* update internal queue state */
1670         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1671         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1672
1673         return nb_pkts;
1674 }
1675
1676 static inline uint16_t
1677 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1678              uint16_t nb_pkts)
1679 {
1680         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1681         uint16_t nb_rx = 0;
1682
1683         /* Any previously recv'd pkts will be returned from the Rx stage */
1684         if (rxq->rx_nb_avail)
1685                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1686
1687         /* Scan the H/W ring for packets to receive */
1688         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1689
1690         /* update internal queue state */
1691         rxq->rx_next_avail = 0;
1692         rxq->rx_nb_avail = nb_rx;
1693         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1694
1695         /* if required, allocate new buffers to replenish descriptors */
1696         if (rxq->rx_tail > rxq->rx_free_trigger) {
1697                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1698
1699                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1700                         int i, j;
1701
1702                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1703                                    "queue_id=%u", (unsigned) rxq->port_id,
1704                                    (unsigned) rxq->queue_id);
1705
1706                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1707                                 rxq->rx_free_thresh;
1708
1709                         /*
1710                          * Need to rewind any previous receives if we cannot
1711                          * allocate new buffers to replenish the old ones.
1712                          */
1713                         rxq->rx_nb_avail = 0;
1714                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1715                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1716                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1717
1718                         return 0;
1719                 }
1720
1721                 /* update tail pointer */
1722                 rte_wmb();
1723                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1724                                             cur_free_trigger);
1725         }
1726
1727         if (rxq->rx_tail >= rxq->nb_rx_desc)
1728                 rxq->rx_tail = 0;
1729
1730         /* received any packets this loop? */
1731         if (rxq->rx_nb_avail)
1732                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1733
1734         return 0;
1735 }
1736
1737 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1738 uint16_t
1739 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1740                            uint16_t nb_pkts)
1741 {
1742         uint16_t nb_rx;
1743
1744         if (unlikely(nb_pkts == 0))
1745                 return 0;
1746
1747         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1748                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1749
1750         /* request is relatively large, chunk it up */
1751         nb_rx = 0;
1752         while (nb_pkts) {
1753                 uint16_t ret, n;
1754
1755                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1756                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1757                 nb_rx = (uint16_t)(nb_rx + ret);
1758                 nb_pkts = (uint16_t)(nb_pkts - ret);
1759                 if (ret < n)
1760                         break;
1761         }
1762
1763         return nb_rx;
1764 }
1765
1766 uint16_t
1767 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1768                 uint16_t nb_pkts)
1769 {
1770         struct ixgbe_rx_queue *rxq;
1771         volatile union ixgbe_adv_rx_desc *rx_ring;
1772         volatile union ixgbe_adv_rx_desc *rxdp;
1773         struct ixgbe_rx_entry *sw_ring;
1774         struct ixgbe_rx_entry *rxe;
1775         struct rte_mbuf *rxm;
1776         struct rte_mbuf *nmb;
1777         union ixgbe_adv_rx_desc rxd;
1778         uint64_t dma_addr;
1779         uint32_t staterr;
1780         uint32_t pkt_info;
1781         uint16_t pkt_len;
1782         uint16_t rx_id;
1783         uint16_t nb_rx;
1784         uint16_t nb_hold;
1785         uint64_t pkt_flags;
1786         uint64_t vlan_flags;
1787
1788         nb_rx = 0;
1789         nb_hold = 0;
1790         rxq = rx_queue;
1791         rx_id = rxq->rx_tail;
1792         rx_ring = rxq->rx_ring;
1793         sw_ring = rxq->sw_ring;
1794         vlan_flags = rxq->vlan_flags;
1795         while (nb_rx < nb_pkts) {
1796                 /*
1797                  * The order of operations here is important as the DD status
1798                  * bit must not be read after any other descriptor fields.
1799                  * rx_ring and rxdp are pointing to volatile data so the order
1800                  * of accesses cannot be reordered by the compiler. If they were
1801                  * not volatile, they could be reordered which could lead to
1802                  * using invalid descriptor fields when read from rxd.
1803                  */
1804                 rxdp = &rx_ring[rx_id];
1805                 staterr = rxdp->wb.upper.status_error;
1806                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1807                         break;
1808                 rxd = *rxdp;
1809
1810                 /*
1811                  * End of packet.
1812                  *
1813                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1814                  * is likely to be invalid and to be dropped by the various
1815                  * validation checks performed by the network stack.
1816                  *
1817                  * Allocate a new mbuf to replenish the RX ring descriptor.
1818                  * If the allocation fails:
1819                  *    - arrange for that RX descriptor to be the first one
1820                  *      being parsed the next time the receive function is
1821                  *      invoked [on the same queue].
1822                  *
1823                  *    - Stop parsing the RX ring and return immediately.
1824                  *
1825                  * This policy do not drop the packet received in the RX
1826                  * descriptor for which the allocation of a new mbuf failed.
1827                  * Thus, it allows that packet to be later retrieved if
1828                  * mbuf have been freed in the mean time.
1829                  * As a side effect, holding RX descriptors instead of
1830                  * systematically giving them back to the NIC may lead to
1831                  * RX ring exhaustion situations.
1832                  * However, the NIC can gracefully prevent such situations
1833                  * to happen by sending specific "back-pressure" flow control
1834                  * frames to its peer(s).
1835                  */
1836                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1837                            "ext_err_stat=0x%08x pkt_len=%u",
1838                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1839                            (unsigned) rx_id, (unsigned) staterr,
1840                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1841
1842                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1843                 if (nmb == NULL) {
1844                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1845                                    "queue_id=%u", (unsigned) rxq->port_id,
1846                                    (unsigned) rxq->queue_id);
1847                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1848                         break;
1849                 }
1850
1851                 nb_hold++;
1852                 rxe = &sw_ring[rx_id];
1853                 rx_id++;
1854                 if (rx_id == rxq->nb_rx_desc)
1855                         rx_id = 0;
1856
1857                 /* Prefetch next mbuf while processing current one. */
1858                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1859
1860                 /*
1861                  * When next RX descriptor is on a cache-line boundary,
1862                  * prefetch the next 4 RX descriptors and the next 8 pointers
1863                  * to mbufs.
1864                  */
1865                 if ((rx_id & 0x3) == 0) {
1866                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1867                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1868                 }
1869
1870                 rxm = rxe->mbuf;
1871                 rxe->mbuf = nmb;
1872                 dma_addr =
1873                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1874                 rxdp->read.hdr_addr = 0;
1875                 rxdp->read.pkt_addr = dma_addr;
1876
1877                 /*
1878                  * Initialize the returned mbuf.
1879                  * 1) setup generic mbuf fields:
1880                  *    - number of segments,
1881                  *    - next segment,
1882                  *    - packet length,
1883                  *    - RX port identifier.
1884                  * 2) integrate hardware offload data, if any:
1885                  *    - RSS flag & hash,
1886                  *    - IP checksum flag,
1887                  *    - VLAN TCI, if any,
1888                  *    - error flags.
1889                  */
1890                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1891                                       rxq->crc_len);
1892                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1893                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1894                 rxm->nb_segs = 1;
1895                 rxm->next = NULL;
1896                 rxm->pkt_len = pkt_len;
1897                 rxm->data_len = pkt_len;
1898                 rxm->port = rxq->port_id;
1899
1900                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1901                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1902                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1903
1904                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1905                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1906                 pkt_flags = pkt_flags |
1907                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1908                 rxm->ol_flags = pkt_flags;
1909                 rxm->packet_type =
1910                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1911                                                        rxq->pkt_type_mask);
1912
1913                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1914                         rxm->hash.rss = rte_le_to_cpu_32(
1915                                                 rxd.wb.lower.hi_dword.rss);
1916                 else if (pkt_flags & PKT_RX_FDIR) {
1917                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1918                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1919                                         IXGBE_ATR_HASH_MASK;
1920                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1921                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1922                 }
1923                 /*
1924                  * Store the mbuf address into the next entry of the array
1925                  * of returned packets.
1926                  */
1927                 rx_pkts[nb_rx++] = rxm;
1928         }
1929         rxq->rx_tail = rx_id;
1930
1931         /*
1932          * If the number of free RX descriptors is greater than the RX free
1933          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1934          * register.
1935          * Update the RDT with the value of the last processed RX descriptor
1936          * minus 1, to guarantee that the RDT register is never equal to the
1937          * RDH register, which creates a "full" ring situtation from the
1938          * hardware point of view...
1939          */
1940         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1941         if (nb_hold > rxq->rx_free_thresh) {
1942                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1943                            "nb_hold=%u nb_rx=%u",
1944                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1945                            (unsigned) rx_id, (unsigned) nb_hold,
1946                            (unsigned) nb_rx);
1947                 rx_id = (uint16_t) ((rx_id == 0) ?
1948                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1949                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1950                 nb_hold = 0;
1951         }
1952         rxq->nb_rx_hold = nb_hold;
1953         return nb_rx;
1954 }
1955
1956 /**
1957  * Detect an RSC descriptor.
1958  */
1959 static inline uint32_t
1960 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1961 {
1962         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1963                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1964 }
1965
1966 /**
1967  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1968  *
1969  * Fill the following info in the HEAD buffer of the Rx cluster:
1970  *    - RX port identifier
1971  *    - hardware offload data, if any:
1972  *      - RSS flag & hash
1973  *      - IP checksum flag
1974  *      - VLAN TCI, if any
1975  *      - error flags
1976  * @head HEAD of the packet cluster
1977  * @desc HW descriptor to get data from
1978  * @rxq Pointer to the Rx queue
1979  */
1980 static inline void
1981 ixgbe_fill_cluster_head_buf(
1982         struct rte_mbuf *head,
1983         union ixgbe_adv_rx_desc *desc,
1984         struct ixgbe_rx_queue *rxq,
1985         uint32_t staterr)
1986 {
1987         uint32_t pkt_info;
1988         uint64_t pkt_flags;
1989
1990         head->port = rxq->port_id;
1991
1992         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1993          * set in the pkt_flags field.
1994          */
1995         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1996         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1997         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1998         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1999         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2000         head->ol_flags = pkt_flags;
2001         head->packet_type =
2002                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2003
2004         if (likely(pkt_flags & PKT_RX_RSS_HASH))
2005                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2006         else if (pkt_flags & PKT_RX_FDIR) {
2007                 head->hash.fdir.hash =
2008                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2009                                                           & IXGBE_ATR_HASH_MASK;
2010                 head->hash.fdir.id =
2011                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2012         }
2013 }
2014
2015 /**
2016  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2017  *
2018  * @rx_queue Rx queue handle
2019  * @rx_pkts table of received packets
2020  * @nb_pkts size of rx_pkts table
2021  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2022  *
2023  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2024  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2025  *
2026  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2027  * 1) When non-EOP RSC completion arrives:
2028  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2029  *       segment's data length.
2030  *    b) Set the "next" pointer of the current segment to point to the segment
2031  *       at the NEXTP index.
2032  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2033  *       in the sw_rsc_ring.
2034  * 2) When EOP arrives we just update the cluster's total length and offload
2035  *    flags and deliver the cluster up to the upper layers. In our case - put it
2036  *    in the rx_pkts table.
2037  *
2038  * Returns the number of received packets/clusters (according to the "bulk
2039  * receive" interface).
2040  */
2041 static inline uint16_t
2042 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2043                     bool bulk_alloc)
2044 {
2045         struct ixgbe_rx_queue *rxq = rx_queue;
2046         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2047         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2048         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2049         uint16_t rx_id = rxq->rx_tail;
2050         uint16_t nb_rx = 0;
2051         uint16_t nb_hold = rxq->nb_rx_hold;
2052         uint16_t prev_id = rxq->rx_tail;
2053
2054         while (nb_rx < nb_pkts) {
2055                 bool eop;
2056                 struct ixgbe_rx_entry *rxe;
2057                 struct ixgbe_scattered_rx_entry *sc_entry;
2058                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2059                 struct ixgbe_rx_entry *next_rxe = NULL;
2060                 struct rte_mbuf *first_seg;
2061                 struct rte_mbuf *rxm;
2062                 struct rte_mbuf *nmb = NULL;
2063                 union ixgbe_adv_rx_desc rxd;
2064                 uint16_t data_len;
2065                 uint16_t next_id;
2066                 volatile union ixgbe_adv_rx_desc *rxdp;
2067                 uint32_t staterr;
2068
2069 next_desc:
2070                 /*
2071                  * The code in this whole file uses the volatile pointer to
2072                  * ensure the read ordering of the status and the rest of the
2073                  * descriptor fields (on the compiler level only!!!). This is so
2074                  * UGLY - why not to just use the compiler barrier instead? DPDK
2075                  * even has the rte_compiler_barrier() for that.
2076                  *
2077                  * But most importantly this is just wrong because this doesn't
2078                  * ensure memory ordering in a general case at all. For
2079                  * instance, DPDK is supposed to work on Power CPUs where
2080                  * compiler barrier may just not be enough!
2081                  *
2082                  * I tried to write only this function properly to have a
2083                  * starting point (as a part of an LRO/RSC series) but the
2084                  * compiler cursed at me when I tried to cast away the
2085                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2086                  * keeping it the way it is for now.
2087                  *
2088                  * The code in this file is broken in so many other places and
2089                  * will just not work on a big endian CPU anyway therefore the
2090                  * lines below will have to be revisited together with the rest
2091                  * of the ixgbe PMD.
2092                  *
2093                  * TODO:
2094                  *    - Get rid of "volatile" and let the compiler do its job.
2095                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2096                  *      memory ordering below.
2097                  */
2098                 rxdp = &rx_ring[rx_id];
2099                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2100
2101                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2102                         break;
2103
2104                 rxd = *rxdp;
2105
2106                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2107                                   "staterr=0x%x data_len=%u",
2108                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2109                            rte_le_to_cpu_16(rxd.wb.upper.length));
2110
2111                 if (!bulk_alloc) {
2112                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2113                         if (nmb == NULL) {
2114                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2115                                                   "port_id=%u queue_id=%u",
2116                                            rxq->port_id, rxq->queue_id);
2117
2118                                 rte_eth_devices[rxq->port_id].data->
2119                                                         rx_mbuf_alloc_failed++;
2120                                 break;
2121                         }
2122                 } else if (nb_hold > rxq->rx_free_thresh) {
2123                         uint16_t next_rdt = rxq->rx_free_trigger;
2124
2125                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2126                                 rte_wmb();
2127                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2128                                                         rxq->rdt_reg_addr,
2129                                                         next_rdt);
2130                                 nb_hold -= rxq->rx_free_thresh;
2131                         } else {
2132                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2133                                                   "port_id=%u queue_id=%u",
2134                                            rxq->port_id, rxq->queue_id);
2135
2136                                 rte_eth_devices[rxq->port_id].data->
2137                                                         rx_mbuf_alloc_failed++;
2138                                 break;
2139                         }
2140                 }
2141
2142                 nb_hold++;
2143                 rxe = &sw_ring[rx_id];
2144                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2145
2146                 next_id = rx_id + 1;
2147                 if (next_id == rxq->nb_rx_desc)
2148                         next_id = 0;
2149
2150                 /* Prefetch next mbuf while processing current one. */
2151                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2152
2153                 /*
2154                  * When next RX descriptor is on a cache-line boundary,
2155                  * prefetch the next 4 RX descriptors and the next 4 pointers
2156                  * to mbufs.
2157                  */
2158                 if ((next_id & 0x3) == 0) {
2159                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2160                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2161                 }
2162
2163                 rxm = rxe->mbuf;
2164
2165                 if (!bulk_alloc) {
2166                         __le64 dma =
2167                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2168                         /*
2169                          * Update RX descriptor with the physical address of the
2170                          * new data buffer of the new allocated mbuf.
2171                          */
2172                         rxe->mbuf = nmb;
2173
2174                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2175                         rxdp->read.hdr_addr = 0;
2176                         rxdp->read.pkt_addr = dma;
2177                 } else
2178                         rxe->mbuf = NULL;
2179
2180                 /*
2181                  * Set data length & data buffer address of mbuf.
2182                  */
2183                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2184                 rxm->data_len = data_len;
2185
2186                 if (!eop) {
2187                         uint16_t nextp_id;
2188                         /*
2189                          * Get next descriptor index:
2190                          *  - For RSC it's in the NEXTP field.
2191                          *  - For a scattered packet - it's just a following
2192                          *    descriptor.
2193                          */
2194                         if (ixgbe_rsc_count(&rxd))
2195                                 nextp_id =
2196                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2197                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2198                         else
2199                                 nextp_id = next_id;
2200
2201                         next_sc_entry = &sw_sc_ring[nextp_id];
2202                         next_rxe = &sw_ring[nextp_id];
2203                         rte_ixgbe_prefetch(next_rxe);
2204                 }
2205
2206                 sc_entry = &sw_sc_ring[rx_id];
2207                 first_seg = sc_entry->fbuf;
2208                 sc_entry->fbuf = NULL;
2209
2210                 /*
2211                  * If this is the first buffer of the received packet,
2212                  * set the pointer to the first mbuf of the packet and
2213                  * initialize its context.
2214                  * Otherwise, update the total length and the number of segments
2215                  * of the current scattered packet, and update the pointer to
2216                  * the last mbuf of the current packet.
2217                  */
2218                 if (first_seg == NULL) {
2219                         first_seg = rxm;
2220                         first_seg->pkt_len = data_len;
2221                         first_seg->nb_segs = 1;
2222                 } else {
2223                         first_seg->pkt_len += data_len;
2224                         first_seg->nb_segs++;
2225                 }
2226
2227                 prev_id = rx_id;
2228                 rx_id = next_id;
2229
2230                 /*
2231                  * If this is not the last buffer of the received packet, update
2232                  * the pointer to the first mbuf at the NEXTP entry in the
2233                  * sw_sc_ring and continue to parse the RX ring.
2234                  */
2235                 if (!eop && next_rxe) {
2236                         rxm->next = next_rxe->mbuf;
2237                         next_sc_entry->fbuf = first_seg;
2238                         goto next_desc;
2239                 }
2240
2241                 /* Initialize the first mbuf of the returned packet */
2242                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2243
2244                 /*
2245                  * Deal with the case, when HW CRC srip is disabled.
2246                  * That can't happen when LRO is enabled, but still could
2247                  * happen for scattered RX mode.
2248                  */
2249                 first_seg->pkt_len -= rxq->crc_len;
2250                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2251                         struct rte_mbuf *lp;
2252
2253                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2254                                 ;
2255
2256                         first_seg->nb_segs--;
2257                         lp->data_len -= rxq->crc_len - rxm->data_len;
2258                         lp->next = NULL;
2259                         rte_pktmbuf_free_seg(rxm);
2260                 } else
2261                         rxm->data_len -= rxq->crc_len;
2262
2263                 /* Prefetch data of first segment, if configured to do so. */
2264                 rte_packet_prefetch((char *)first_seg->buf_addr +
2265                         first_seg->data_off);
2266
2267                 /*
2268                  * Store the mbuf address into the next entry of the array
2269                  * of returned packets.
2270                  */
2271                 rx_pkts[nb_rx++] = first_seg;
2272         }
2273
2274         /*
2275          * Record index of the next RX descriptor to probe.
2276          */
2277         rxq->rx_tail = rx_id;
2278
2279         /*
2280          * If the number of free RX descriptors is greater than the RX free
2281          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2282          * register.
2283          * Update the RDT with the value of the last processed RX descriptor
2284          * minus 1, to guarantee that the RDT register is never equal to the
2285          * RDH register, which creates a "full" ring situtation from the
2286          * hardware point of view...
2287          */
2288         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2289                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2290                            "nb_hold=%u nb_rx=%u",
2291                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2292
2293                 rte_wmb();
2294                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2295                 nb_hold = 0;
2296         }
2297
2298         rxq->nb_rx_hold = nb_hold;
2299         return nb_rx;
2300 }
2301
2302 uint16_t
2303 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2304                                  uint16_t nb_pkts)
2305 {
2306         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2307 }
2308
2309 uint16_t
2310 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2311                                uint16_t nb_pkts)
2312 {
2313         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2314 }
2315
2316 /*********************************************************************
2317  *
2318  *  Queue management functions
2319  *
2320  **********************************************************************/
2321
2322 static void __rte_cold
2323 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2324 {
2325         unsigned i;
2326
2327         if (txq->sw_ring != NULL) {
2328                 for (i = 0; i < txq->nb_tx_desc; i++) {
2329                         if (txq->sw_ring[i].mbuf != NULL) {
2330                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2331                                 txq->sw_ring[i].mbuf = NULL;
2332                         }
2333                 }
2334         }
2335 }
2336
2337 static int
2338 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2339 {
2340         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2341         uint16_t i, tx_last, tx_id;
2342         uint16_t nb_tx_free_last;
2343         uint16_t nb_tx_to_clean;
2344         uint32_t pkt_cnt;
2345
2346         /* Start free mbuf from the next of tx_tail */
2347         tx_last = txq->tx_tail;
2348         tx_id  = swr_ring[tx_last].next_id;
2349
2350         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2351                 return 0;
2352
2353         nb_tx_to_clean = txq->nb_tx_free;
2354         nb_tx_free_last = txq->nb_tx_free;
2355         if (!free_cnt)
2356                 free_cnt = txq->nb_tx_desc;
2357
2358         /* Loop through swr_ring to count the amount of
2359          * freeable mubfs and packets.
2360          */
2361         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2362                 for (i = 0; i < nb_tx_to_clean &&
2363                         pkt_cnt < free_cnt &&
2364                         tx_id != tx_last; i++) {
2365                         if (swr_ring[tx_id].mbuf != NULL) {
2366                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2367                                 swr_ring[tx_id].mbuf = NULL;
2368
2369                                 /*
2370                                  * last segment in the packet,
2371                                  * increment packet count
2372                                  */
2373                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2374                         }
2375
2376                         tx_id = swr_ring[tx_id].next_id;
2377                 }
2378
2379                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2380                         txq->nb_tx_free || tx_id == tx_last)
2381                         break;
2382
2383                 if (pkt_cnt < free_cnt) {
2384                         if (ixgbe_xmit_cleanup(txq))
2385                                 break;
2386
2387                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2388                         nb_tx_free_last = txq->nb_tx_free;
2389                 }
2390         }
2391
2392         return (int)pkt_cnt;
2393 }
2394
2395 static int
2396 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2397                         uint32_t free_cnt)
2398 {
2399         int i, n, cnt;
2400
2401         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2402                 free_cnt = txq->nb_tx_desc;
2403
2404         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2405
2406         for (i = 0; i < cnt; i += n) {
2407                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2408                         break;
2409
2410                 n = ixgbe_tx_free_bufs(txq);
2411
2412                 if (n == 0)
2413                         break;
2414         }
2415
2416         return i;
2417 }
2418
2419 static int
2420 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2421                         uint32_t free_cnt __rte_unused)
2422 {
2423         return -ENOTSUP;
2424 }
2425
2426 int
2427 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2428 {
2429         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2430         if (txq->offloads == 0 &&
2431 #ifdef RTE_LIB_SECURITY
2432                         !(txq->using_ipsec) &&
2433 #endif
2434                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2435                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2436                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2437                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2438                                         txq->sw_ring_v != NULL)) {
2439                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2440                 } else {
2441                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2442                 }
2443         }
2444
2445         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2446 }
2447
2448 static void __rte_cold
2449 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2450 {
2451         if (txq != NULL &&
2452             txq->sw_ring != NULL)
2453                 rte_free(txq->sw_ring);
2454 }
2455
2456 static void __rte_cold
2457 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2458 {
2459         if (txq != NULL && txq->ops != NULL) {
2460                 txq->ops->release_mbufs(txq);
2461                 txq->ops->free_swring(txq);
2462                 rte_free(txq);
2463         }
2464 }
2465
2466 void __rte_cold
2467 ixgbe_dev_tx_queue_release(void *txq)
2468 {
2469         ixgbe_tx_queue_release(txq);
2470 }
2471
2472 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2473 static void __rte_cold
2474 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2475 {
2476         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2477         struct ixgbe_tx_entry *txe = txq->sw_ring;
2478         uint16_t prev, i;
2479
2480         /* Zero out HW ring memory */
2481         for (i = 0; i < txq->nb_tx_desc; i++) {
2482                 txq->tx_ring[i] = zeroed_desc;
2483         }
2484
2485         /* Initialize SW ring entries */
2486         prev = (uint16_t) (txq->nb_tx_desc - 1);
2487         for (i = 0; i < txq->nb_tx_desc; i++) {
2488                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2489
2490                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2491                 txe[i].mbuf = NULL;
2492                 txe[i].last_id = i;
2493                 txe[prev].next_id = i;
2494                 prev = i;
2495         }
2496
2497         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2498         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2499
2500         txq->tx_tail = 0;
2501         txq->nb_tx_used = 0;
2502         /*
2503          * Always allow 1 descriptor to be un-allocated to avoid
2504          * a H/W race condition
2505          */
2506         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2507         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2508         txq->ctx_curr = 0;
2509         memset((void *)&txq->ctx_cache, 0,
2510                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2511 }
2512
2513 static const struct ixgbe_txq_ops def_txq_ops = {
2514         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2515         .free_swring = ixgbe_tx_free_swring,
2516         .reset = ixgbe_reset_tx_queue,
2517 };
2518
2519 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2520  * the queue parameters. Used in tx_queue_setup by primary process and then
2521  * in dev_init by secondary process when attaching to an existing ethdev.
2522  */
2523 void __rte_cold
2524 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2525 {
2526         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2527         if ((txq->offloads == 0) &&
2528 #ifdef RTE_LIB_SECURITY
2529                         !(txq->using_ipsec) &&
2530 #endif
2531                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2532                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2533                 dev->tx_pkt_prepare = NULL;
2534                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2535                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2536                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2537                                         ixgbe_txq_vec_setup(txq) == 0)) {
2538                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2539                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2540                 } else
2541                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2542         } else {
2543                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2544                 PMD_INIT_LOG(DEBUG,
2545                                 " - offloads = 0x%" PRIx64,
2546                                 txq->offloads);
2547                 PMD_INIT_LOG(DEBUG,
2548                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2549                                 (unsigned long)txq->tx_rs_thresh,
2550                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2551                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2552                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2553         }
2554 }
2555
2556 uint64_t
2557 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2558 {
2559         RTE_SET_USED(dev);
2560
2561         return 0;
2562 }
2563
2564 uint64_t
2565 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2566 {
2567         uint64_t tx_offload_capa;
2568         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2569
2570         tx_offload_capa =
2571                 DEV_TX_OFFLOAD_VLAN_INSERT |
2572                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2573                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2574                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2575                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2576                 DEV_TX_OFFLOAD_TCP_TSO     |
2577                 DEV_TX_OFFLOAD_MULTI_SEGS;
2578
2579         if (hw->mac.type == ixgbe_mac_82599EB ||
2580             hw->mac.type == ixgbe_mac_X540)
2581                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2582
2583         if (hw->mac.type == ixgbe_mac_X550 ||
2584             hw->mac.type == ixgbe_mac_X550EM_x ||
2585             hw->mac.type == ixgbe_mac_X550EM_a)
2586                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2587
2588 #ifdef RTE_LIB_SECURITY
2589         if (dev->security_ctx)
2590                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2591 #endif
2592         return tx_offload_capa;
2593 }
2594
2595 int __rte_cold
2596 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2597                          uint16_t queue_idx,
2598                          uint16_t nb_desc,
2599                          unsigned int socket_id,
2600                          const struct rte_eth_txconf *tx_conf)
2601 {
2602         const struct rte_memzone *tz;
2603         struct ixgbe_tx_queue *txq;
2604         struct ixgbe_hw     *hw;
2605         uint16_t tx_rs_thresh, tx_free_thresh;
2606         uint64_t offloads;
2607
2608         PMD_INIT_FUNC_TRACE();
2609         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2610
2611         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2612
2613         /*
2614          * Validate number of transmit descriptors.
2615          * It must not exceed hardware maximum, and must be multiple
2616          * of IXGBE_ALIGN.
2617          */
2618         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2619                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2620                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2621                 return -EINVAL;
2622         }
2623
2624         /*
2625          * The following two parameters control the setting of the RS bit on
2626          * transmit descriptors.
2627          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2628          * descriptors have been used.
2629          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2630          * descriptors are used or if the number of descriptors required
2631          * to transmit a packet is greater than the number of free TX
2632          * descriptors.
2633          * The following constraints must be satisfied:
2634          *  tx_rs_thresh must be greater than 0.
2635          *  tx_rs_thresh must be less than the size of the ring minus 2.
2636          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2637          *  tx_rs_thresh must be a divisor of the ring size.
2638          *  tx_free_thresh must be greater than 0.
2639          *  tx_free_thresh must be less than the size of the ring minus 3.
2640          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2641          * One descriptor in the TX ring is used as a sentinel to avoid a
2642          * H/W race condition, hence the maximum threshold constraints.
2643          * When set to zero use default values.
2644          */
2645         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2646                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2647         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2648         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2649                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2650         if (tx_conf->tx_rs_thresh > 0)
2651                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2652         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2653                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2654                              "exceed nb_desc. (tx_rs_thresh=%u "
2655                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2656                              (unsigned int)tx_rs_thresh,
2657                              (unsigned int)tx_free_thresh,
2658                              (unsigned int)nb_desc,
2659                              (int)dev->data->port_id,
2660                              (int)queue_idx);
2661                 return -(EINVAL);
2662         }
2663         if (tx_rs_thresh >= (nb_desc - 2)) {
2664                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2665                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2666                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2667                         (int)dev->data->port_id, (int)queue_idx);
2668                 return -(EINVAL);
2669         }
2670         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2671                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2672                         "(tx_rs_thresh=%u port=%d queue=%d)",
2673                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2674                         (int)dev->data->port_id, (int)queue_idx);
2675                 return -(EINVAL);
2676         }
2677         if (tx_free_thresh >= (nb_desc - 3)) {
2678                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2679                              "tx_free_thresh must be less than the number of "
2680                              "TX descriptors minus 3. (tx_free_thresh=%u "
2681                              "port=%d queue=%d)",
2682                              (unsigned int)tx_free_thresh,
2683                              (int)dev->data->port_id, (int)queue_idx);
2684                 return -(EINVAL);
2685         }
2686         if (tx_rs_thresh > tx_free_thresh) {
2687                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2688                              "tx_free_thresh. (tx_free_thresh=%u "
2689                              "tx_rs_thresh=%u port=%d queue=%d)",
2690                              (unsigned int)tx_free_thresh,
2691                              (unsigned int)tx_rs_thresh,
2692                              (int)dev->data->port_id,
2693                              (int)queue_idx);
2694                 return -(EINVAL);
2695         }
2696         if ((nb_desc % tx_rs_thresh) != 0) {
2697                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2698                              "number of TX descriptors. (tx_rs_thresh=%u "
2699                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2700                              (int)dev->data->port_id, (int)queue_idx);
2701                 return -(EINVAL);
2702         }
2703
2704         /*
2705          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2706          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2707          * by the NIC and all descriptors are written back after the NIC
2708          * accumulates WTHRESH descriptors.
2709          */
2710         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2711                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2712                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2713                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2714                              (int)dev->data->port_id, (int)queue_idx);
2715                 return -(EINVAL);
2716         }
2717
2718         /* Free memory prior to re-allocation if needed... */
2719         if (dev->data->tx_queues[queue_idx] != NULL) {
2720                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2721                 dev->data->tx_queues[queue_idx] = NULL;
2722         }
2723
2724         /* First allocate the tx queue data structure */
2725         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2726                                  RTE_CACHE_LINE_SIZE, socket_id);
2727         if (txq == NULL)
2728                 return -ENOMEM;
2729
2730         /*
2731          * Allocate TX ring hardware descriptors. A memzone large enough to
2732          * handle the maximum ring size is allocated in order to allow for
2733          * resizing in later calls to the queue setup function.
2734          */
2735         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2736                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2737                         IXGBE_ALIGN, socket_id);
2738         if (tz == NULL) {
2739                 ixgbe_tx_queue_release(txq);
2740                 return -ENOMEM;
2741         }
2742
2743         txq->nb_tx_desc = nb_desc;
2744         txq->tx_rs_thresh = tx_rs_thresh;
2745         txq->tx_free_thresh = tx_free_thresh;
2746         txq->pthresh = tx_conf->tx_thresh.pthresh;
2747         txq->hthresh = tx_conf->tx_thresh.hthresh;
2748         txq->wthresh = tx_conf->tx_thresh.wthresh;
2749         txq->queue_id = queue_idx;
2750         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2751                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2752         txq->port_id = dev->data->port_id;
2753         txq->offloads = offloads;
2754         txq->ops = &def_txq_ops;
2755         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2756 #ifdef RTE_LIB_SECURITY
2757         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2758                         DEV_TX_OFFLOAD_SECURITY);
2759 #endif
2760
2761         /*
2762          * Modification to set VFTDT for virtual function if vf is detected
2763          */
2764         if (hw->mac.type == ixgbe_mac_82599_vf ||
2765             hw->mac.type == ixgbe_mac_X540_vf ||
2766             hw->mac.type == ixgbe_mac_X550_vf ||
2767             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2768             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2769                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2770         else
2771                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2772
2773         txq->tx_ring_phys_addr = tz->iova;
2774         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2775
2776         /* Allocate software ring */
2777         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2778                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2779                                 RTE_CACHE_LINE_SIZE, socket_id);
2780         if (txq->sw_ring == NULL) {
2781                 ixgbe_tx_queue_release(txq);
2782                 return -ENOMEM;
2783         }
2784         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2785                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2786
2787         /* set up vector or scalar TX function as appropriate */
2788         ixgbe_set_tx_function(dev, txq);
2789
2790         txq->ops->reset(txq);
2791
2792         dev->data->tx_queues[queue_idx] = txq;
2793
2794
2795         return 0;
2796 }
2797
2798 /**
2799  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2800  *
2801  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2802  * in the sw_rsc_ring is not set to NULL but rather points to the next
2803  * mbuf of this RSC aggregation (that has not been completed yet and still
2804  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2805  * will just free first "nb_segs" segments of the cluster explicitly by calling
2806  * an rte_pktmbuf_free_seg().
2807  *
2808  * @m scattered cluster head
2809  */
2810 static void __rte_cold
2811 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2812 {
2813         uint16_t i, nb_segs = m->nb_segs;
2814         struct rte_mbuf *next_seg;
2815
2816         for (i = 0; i < nb_segs; i++) {
2817                 next_seg = m->next;
2818                 rte_pktmbuf_free_seg(m);
2819                 m = next_seg;
2820         }
2821 }
2822
2823 static void __rte_cold
2824 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2825 {
2826         unsigned i;
2827
2828         /* SSE Vector driver has a different way of releasing mbufs. */
2829         if (rxq->rx_using_sse) {
2830                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2831                 return;
2832         }
2833
2834         if (rxq->sw_ring != NULL) {
2835                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2836                         if (rxq->sw_ring[i].mbuf != NULL) {
2837                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2838                                 rxq->sw_ring[i].mbuf = NULL;
2839                         }
2840                 }
2841                 if (rxq->rx_nb_avail) {
2842                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2843                                 struct rte_mbuf *mb;
2844
2845                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2846                                 rte_pktmbuf_free_seg(mb);
2847                         }
2848                         rxq->rx_nb_avail = 0;
2849                 }
2850         }
2851
2852         if (rxq->sw_sc_ring)
2853                 for (i = 0; i < rxq->nb_rx_desc; i++)
2854                         if (rxq->sw_sc_ring[i].fbuf) {
2855                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2856                                 rxq->sw_sc_ring[i].fbuf = NULL;
2857                         }
2858 }
2859
2860 static void __rte_cold
2861 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2862 {
2863         if (rxq != NULL) {
2864                 ixgbe_rx_queue_release_mbufs(rxq);
2865                 rte_free(rxq->sw_ring);
2866                 rte_free(rxq->sw_sc_ring);
2867                 rte_free(rxq);
2868         }
2869 }
2870
2871 void __rte_cold
2872 ixgbe_dev_rx_queue_release(void *rxq)
2873 {
2874         ixgbe_rx_queue_release(rxq);
2875 }
2876
2877 /*
2878  * Check if Rx Burst Bulk Alloc function can be used.
2879  * Return
2880  *        0: the preconditions are satisfied and the bulk allocation function
2881  *           can be used.
2882  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2883  *           function must be used.
2884  */
2885 static inline int __rte_cold
2886 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2887 {
2888         int ret = 0;
2889
2890         /*
2891          * Make sure the following pre-conditions are satisfied:
2892          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2893          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2894          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2895          * Scattered packets are not supported.  This should be checked
2896          * outside of this function.
2897          */
2898         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2899                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2900                              "rxq->rx_free_thresh=%d, "
2901                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2902                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2903                 ret = -EINVAL;
2904         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2905                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2906                              "rxq->rx_free_thresh=%d, "
2907                              "rxq->nb_rx_desc=%d",
2908                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2909                 ret = -EINVAL;
2910         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2911                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2912                              "rxq->nb_rx_desc=%d, "
2913                              "rxq->rx_free_thresh=%d",
2914                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2915                 ret = -EINVAL;
2916         }
2917
2918         return ret;
2919 }
2920
2921 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2922 static void __rte_cold
2923 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2924 {
2925         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2926         unsigned i;
2927         uint16_t len = rxq->nb_rx_desc;
2928
2929         /*
2930          * By default, the Rx queue setup function allocates enough memory for
2931          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2932          * extra memory at the end of the descriptor ring to be zero'd out.
2933          */
2934         if (adapter->rx_bulk_alloc_allowed)
2935                 /* zero out extra memory */
2936                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2937
2938         /*
2939          * Zero out HW ring memory. Zero out extra memory at the end of
2940          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2941          * reads extra memory as zeros.
2942          */
2943         for (i = 0; i < len; i++) {
2944                 rxq->rx_ring[i] = zeroed_desc;
2945         }
2946
2947         /*
2948          * initialize extra software ring entries. Space for these extra
2949          * entries is always allocated
2950          */
2951         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2952         for (i = rxq->nb_rx_desc; i < len; ++i) {
2953                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2954         }
2955
2956         rxq->rx_nb_avail = 0;
2957         rxq->rx_next_avail = 0;
2958         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2959         rxq->rx_tail = 0;
2960         rxq->nb_rx_hold = 0;
2961         rxq->pkt_first_seg = NULL;
2962         rxq->pkt_last_seg = NULL;
2963
2964 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2965         rxq->rxrearm_start = 0;
2966         rxq->rxrearm_nb = 0;
2967 #endif
2968 }
2969
2970 static int
2971 ixgbe_is_vf(struct rte_eth_dev *dev)
2972 {
2973         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2974
2975         switch (hw->mac.type) {
2976         case ixgbe_mac_82599_vf:
2977         case ixgbe_mac_X540_vf:
2978         case ixgbe_mac_X550_vf:
2979         case ixgbe_mac_X550EM_x_vf:
2980         case ixgbe_mac_X550EM_a_vf:
2981                 return 1;
2982         default:
2983                 return 0;
2984         }
2985 }
2986
2987 uint64_t
2988 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2989 {
2990         uint64_t offloads = 0;
2991         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2992
2993         if (hw->mac.type != ixgbe_mac_82598EB)
2994                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2995
2996         return offloads;
2997 }
2998
2999 uint64_t
3000 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3001 {
3002         uint64_t offloads;
3003         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3004
3005         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
3006                    DEV_RX_OFFLOAD_UDP_CKSUM   |
3007                    DEV_RX_OFFLOAD_TCP_CKSUM   |
3008                    DEV_RX_OFFLOAD_KEEP_CRC    |
3009                    DEV_RX_OFFLOAD_JUMBO_FRAME |
3010                    DEV_RX_OFFLOAD_VLAN_FILTER |
3011                    DEV_RX_OFFLOAD_SCATTER |
3012                    DEV_RX_OFFLOAD_RSS_HASH;
3013
3014         if (hw->mac.type == ixgbe_mac_82598EB)
3015                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3016
3017         if (ixgbe_is_vf(dev) == 0)
3018                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
3019
3020         /*
3021          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3022          * mode.
3023          */
3024         if ((hw->mac.type == ixgbe_mac_82599EB ||
3025              hw->mac.type == ixgbe_mac_X540 ||
3026              hw->mac.type == ixgbe_mac_X550) &&
3027             !RTE_ETH_DEV_SRIOV(dev).active)
3028                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
3029
3030         if (hw->mac.type == ixgbe_mac_82599EB ||
3031             hw->mac.type == ixgbe_mac_X540)
3032                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3033
3034         if (hw->mac.type == ixgbe_mac_X550 ||
3035             hw->mac.type == ixgbe_mac_X550EM_x ||
3036             hw->mac.type == ixgbe_mac_X550EM_a)
3037                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3038
3039 #ifdef RTE_LIB_SECURITY
3040         if (dev->security_ctx)
3041                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3042 #endif
3043
3044         return offloads;
3045 }
3046
3047 int __rte_cold
3048 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3049                          uint16_t queue_idx,
3050                          uint16_t nb_desc,
3051                          unsigned int socket_id,
3052                          const struct rte_eth_rxconf *rx_conf,
3053                          struct rte_mempool *mp)
3054 {
3055         const struct rte_memzone *rz;
3056         struct ixgbe_rx_queue *rxq;
3057         struct ixgbe_hw     *hw;
3058         uint16_t len;
3059         struct ixgbe_adapter *adapter = dev->data->dev_private;
3060         uint64_t offloads;
3061
3062         PMD_INIT_FUNC_TRACE();
3063         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3064
3065         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3066
3067         /*
3068          * Validate number of receive descriptors.
3069          * It must not exceed hardware maximum, and must be multiple
3070          * of IXGBE_ALIGN.
3071          */
3072         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3073                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3074                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3075                 return -EINVAL;
3076         }
3077
3078         /* Free memory prior to re-allocation if needed... */
3079         if (dev->data->rx_queues[queue_idx] != NULL) {
3080                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3081                 dev->data->rx_queues[queue_idx] = NULL;
3082         }
3083
3084         /* First allocate the rx queue data structure */
3085         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3086                                  RTE_CACHE_LINE_SIZE, socket_id);
3087         if (rxq == NULL)
3088                 return -ENOMEM;
3089         rxq->mb_pool = mp;
3090         rxq->nb_rx_desc = nb_desc;
3091         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3092         rxq->queue_id = queue_idx;
3093         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3094                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3095         rxq->port_id = dev->data->port_id;
3096         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3097                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3098         else
3099                 rxq->crc_len = 0;
3100         rxq->drop_en = rx_conf->rx_drop_en;
3101         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3102         rxq->offloads = offloads;
3103
3104         /*
3105          * The packet type in RX descriptor is different for different NICs.
3106          * Some bits are used for x550 but reserved for other NICS.
3107          * So set different masks for different NICs.
3108          */
3109         if (hw->mac.type == ixgbe_mac_X550 ||
3110             hw->mac.type == ixgbe_mac_X550EM_x ||
3111             hw->mac.type == ixgbe_mac_X550EM_a ||
3112             hw->mac.type == ixgbe_mac_X550_vf ||
3113             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3114             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3115                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3116         else
3117                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3118
3119         /*
3120          * Allocate RX ring hardware descriptors. A memzone large enough to
3121          * handle the maximum ring size is allocated in order to allow for
3122          * resizing in later calls to the queue setup function.
3123          */
3124         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3125                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3126         if (rz == NULL) {
3127                 ixgbe_rx_queue_release(rxq);
3128                 return -ENOMEM;
3129         }
3130
3131         /*
3132          * Zero init all the descriptors in the ring.
3133          */
3134         memset(rz->addr, 0, RX_RING_SZ);
3135
3136         /*
3137          * Modified to setup VFRDT for Virtual Function
3138          */
3139         if (hw->mac.type == ixgbe_mac_82599_vf ||
3140             hw->mac.type == ixgbe_mac_X540_vf ||
3141             hw->mac.type == ixgbe_mac_X550_vf ||
3142             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3143             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3144                 rxq->rdt_reg_addr =
3145                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3146                 rxq->rdh_reg_addr =
3147                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3148         } else {
3149                 rxq->rdt_reg_addr =
3150                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3151                 rxq->rdh_reg_addr =
3152                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3153         }
3154
3155         rxq->rx_ring_phys_addr = rz->iova;
3156         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3157
3158         /*
3159          * Certain constraints must be met in order to use the bulk buffer
3160          * allocation Rx burst function. If any of Rx queues doesn't meet them
3161          * the feature should be disabled for the whole port.
3162          */
3163         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3164                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3165                                     "preconditions - canceling the feature for "
3166                                     "the whole port[%d]",
3167                              rxq->queue_id, rxq->port_id);
3168                 adapter->rx_bulk_alloc_allowed = false;
3169         }
3170
3171         /*
3172          * Allocate software ring. Allow for space at the end of the
3173          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3174          * function does not access an invalid memory region.
3175          */
3176         len = nb_desc;
3177         if (adapter->rx_bulk_alloc_allowed)
3178                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3179
3180         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3181                                           sizeof(struct ixgbe_rx_entry) * len,
3182                                           RTE_CACHE_LINE_SIZE, socket_id);
3183         if (!rxq->sw_ring) {
3184                 ixgbe_rx_queue_release(rxq);
3185                 return -ENOMEM;
3186         }
3187
3188         /*
3189          * Always allocate even if it's not going to be needed in order to
3190          * simplify the code.
3191          *
3192          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3193          * be requested in ixgbe_dev_rx_init(), which is called later from
3194          * dev_start() flow.
3195          */
3196         rxq->sw_sc_ring =
3197                 rte_zmalloc_socket("rxq->sw_sc_ring",
3198                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3199                                    RTE_CACHE_LINE_SIZE, socket_id);
3200         if (!rxq->sw_sc_ring) {
3201                 ixgbe_rx_queue_release(rxq);
3202                 return -ENOMEM;
3203         }
3204
3205         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3206                             "dma_addr=0x%"PRIx64,
3207                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3208                      rxq->rx_ring_phys_addr);
3209
3210         if (!rte_is_power_of_2(nb_desc)) {
3211                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3212                                     "preconditions - canceling the feature for "
3213                                     "the whole port[%d]",
3214                              rxq->queue_id, rxq->port_id);
3215                 adapter->rx_vec_allowed = false;
3216         } else
3217                 ixgbe_rxq_vec_setup(rxq);
3218
3219         dev->data->rx_queues[queue_idx] = rxq;
3220
3221         ixgbe_reset_rx_queue(adapter, rxq);
3222
3223         return 0;
3224 }
3225
3226 uint32_t
3227 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3228 {
3229 #define IXGBE_RXQ_SCAN_INTERVAL 4
3230         volatile union ixgbe_adv_rx_desc *rxdp;
3231         struct ixgbe_rx_queue *rxq;
3232         uint32_t desc = 0;
3233
3234         rxq = dev->data->rx_queues[rx_queue_id];
3235         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3236
3237         while ((desc < rxq->nb_rx_desc) &&
3238                 (rxdp->wb.upper.status_error &
3239                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3240                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3241                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3242                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3243                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3244                                 desc - rxq->nb_rx_desc]);
3245         }
3246
3247         return desc;
3248 }
3249
3250 int
3251 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3252 {
3253         volatile union ixgbe_adv_rx_desc *rxdp;
3254         struct ixgbe_rx_queue *rxq = rx_queue;
3255         uint32_t desc;
3256
3257         if (unlikely(offset >= rxq->nb_rx_desc))
3258                 return 0;
3259         desc = rxq->rx_tail + offset;
3260         if (desc >= rxq->nb_rx_desc)
3261                 desc -= rxq->nb_rx_desc;
3262
3263         rxdp = &rxq->rx_ring[desc];
3264         return !!(rxdp->wb.upper.status_error &
3265                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3266 }
3267
3268 int
3269 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3270 {
3271         struct ixgbe_rx_queue *rxq = rx_queue;
3272         volatile uint32_t *status;
3273         uint32_t nb_hold, desc;
3274
3275         if (unlikely(offset >= rxq->nb_rx_desc))
3276                 return -EINVAL;
3277
3278 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3279         if (rxq->rx_using_sse)
3280                 nb_hold = rxq->rxrearm_nb;
3281         else
3282 #endif
3283                 nb_hold = rxq->nb_rx_hold;
3284         if (offset >= rxq->nb_rx_desc - nb_hold)
3285                 return RTE_ETH_RX_DESC_UNAVAIL;
3286
3287         desc = rxq->rx_tail + offset;
3288         if (desc >= rxq->nb_rx_desc)
3289                 desc -= rxq->nb_rx_desc;
3290
3291         status = &rxq->rx_ring[desc].wb.upper.status_error;
3292         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3293                 return RTE_ETH_RX_DESC_DONE;
3294
3295         return RTE_ETH_RX_DESC_AVAIL;
3296 }
3297
3298 int
3299 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3300 {
3301         struct ixgbe_tx_queue *txq = tx_queue;
3302         volatile uint32_t *status;
3303         uint32_t desc;
3304
3305         if (unlikely(offset >= txq->nb_tx_desc))
3306                 return -EINVAL;
3307
3308         desc = txq->tx_tail + offset;
3309         /* go to next desc that has the RS bit */
3310         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3311                 txq->tx_rs_thresh;
3312         if (desc >= txq->nb_tx_desc) {
3313                 desc -= txq->nb_tx_desc;
3314                 if (desc >= txq->nb_tx_desc)
3315                         desc -= txq->nb_tx_desc;
3316         }
3317
3318         status = &txq->tx_ring[desc].wb.status;
3319         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3320                 return RTE_ETH_TX_DESC_DONE;
3321
3322         return RTE_ETH_TX_DESC_FULL;
3323 }
3324
3325 /*
3326  * Set up link loopback for X540/X550 mode Tx->Rx.
3327  */
3328 static inline void __rte_cold
3329 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3330 {
3331         uint32_t macc;
3332         PMD_INIT_FUNC_TRACE();
3333
3334         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3335
3336         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3337                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3338         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3339
3340         if (enable) {
3341                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3342                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3343                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3344                 macc |= IXGBE_MACC_FLU;
3345         } else {
3346                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3347                 macc &= ~IXGBE_MACC_FLU;
3348         }
3349
3350         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3351                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3352
3353         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3354 }
3355
3356 void __rte_cold
3357 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3358 {
3359         unsigned i;
3360         struct ixgbe_adapter *adapter = dev->data->dev_private;
3361         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3362
3363         PMD_INIT_FUNC_TRACE();
3364
3365         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3366                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3367
3368                 if (txq != NULL) {
3369                         txq->ops->release_mbufs(txq);
3370                         txq->ops->reset(txq);
3371                 }
3372         }
3373
3374         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3375                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3376
3377                 if (rxq != NULL) {
3378                         ixgbe_rx_queue_release_mbufs(rxq);
3379                         ixgbe_reset_rx_queue(adapter, rxq);
3380                 }
3381         }
3382         /* If loopback mode was enabled, reconfigure the link accordingly */
3383         if (dev->data->dev_conf.lpbk_mode != 0) {
3384                 if (hw->mac.type == ixgbe_mac_X540 ||
3385                      hw->mac.type == ixgbe_mac_X550 ||
3386                      hw->mac.type == ixgbe_mac_X550EM_x ||
3387                      hw->mac.type == ixgbe_mac_X550EM_a)
3388                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3389         }
3390 }
3391
3392 void
3393 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3394 {
3395         unsigned i;
3396
3397         PMD_INIT_FUNC_TRACE();
3398
3399         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3400                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3401                 dev->data->rx_queues[i] = NULL;
3402                 rte_eth_dma_zone_free(dev, "rx_ring", i);
3403         }
3404         dev->data->nb_rx_queues = 0;
3405
3406         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3407                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3408                 dev->data->tx_queues[i] = NULL;
3409                 rte_eth_dma_zone_free(dev, "tx_ring", i);
3410         }
3411         dev->data->nb_tx_queues = 0;
3412 }
3413
3414 /*********************************************************************
3415  *
3416  *  Device RX/TX init functions
3417  *
3418  **********************************************************************/
3419
3420 /**
3421  * Receive Side Scaling (RSS)
3422  * See section 7.1.2.8 in the following document:
3423  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3424  *
3425  * Principles:
3426  * The source and destination IP addresses of the IP header and the source
3427  * and destination ports of TCP/UDP headers, if any, of received packets are
3428  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3429  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3430  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3431  * RSS output index which is used as the RX queue index where to store the
3432  * received packets.
3433  * The following output is supplied in the RX write-back descriptor:
3434  *     - 32-bit result of the Microsoft RSS hash function,
3435  *     - 4-bit RSS type field.
3436  */
3437
3438 /*
3439  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3440  * Used as the default key.
3441  */
3442 static uint8_t rss_intel_key[40] = {
3443         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3444         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3445         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3446         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3447         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3448 };
3449
3450 static void
3451 ixgbe_rss_disable(struct rte_eth_dev *dev)
3452 {
3453         struct ixgbe_hw *hw;
3454         uint32_t mrqc;
3455         uint32_t mrqc_reg;
3456
3457         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3458         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3459         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3460         mrqc &= ~IXGBE_MRQC_RSSEN;
3461         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3462 }
3463
3464 static void
3465 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3466 {
3467         uint8_t  *hash_key;
3468         uint32_t mrqc;
3469         uint32_t rss_key;
3470         uint64_t rss_hf;
3471         uint16_t i;
3472         uint32_t mrqc_reg;
3473         uint32_t rssrk_reg;
3474
3475         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3476         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3477
3478         hash_key = rss_conf->rss_key;
3479         if (hash_key != NULL) {
3480                 /* Fill in RSS hash key */
3481                 for (i = 0; i < 10; i++) {
3482                         rss_key  = hash_key[(i * 4)];
3483                         rss_key |= hash_key[(i * 4) + 1] << 8;
3484                         rss_key |= hash_key[(i * 4) + 2] << 16;
3485                         rss_key |= hash_key[(i * 4) + 3] << 24;
3486                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3487                 }
3488         }
3489
3490         /* Set configured hashing protocols in MRQC register */
3491         rss_hf = rss_conf->rss_hf;
3492         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3493         if (rss_hf & ETH_RSS_IPV4)
3494                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3495         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3496                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3497         if (rss_hf & ETH_RSS_IPV6)
3498                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3499         if (rss_hf & ETH_RSS_IPV6_EX)
3500                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3501         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3502                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3503         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3504                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3505         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3506                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3507         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3508                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3509         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3510                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3511         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3512 }
3513
3514 int
3515 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3516                           struct rte_eth_rss_conf *rss_conf)
3517 {
3518         struct ixgbe_hw *hw;
3519         uint32_t mrqc;
3520         uint64_t rss_hf;
3521         uint32_t mrqc_reg;
3522
3523         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3524
3525         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3526                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3527                         "NIC.");
3528                 return -ENOTSUP;
3529         }
3530         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3531
3532         /*
3533          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3534          *     "RSS enabling cannot be done dynamically while it must be
3535          *      preceded by a software reset"
3536          * Before changing anything, first check that the update RSS operation
3537          * does not attempt to disable RSS, if RSS was enabled at
3538          * initialization time, or does not attempt to enable RSS, if RSS was
3539          * disabled at initialization time.
3540          */
3541         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3542         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3543         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3544                 if (rss_hf != 0) /* Enable RSS */
3545                         return -(EINVAL);
3546                 return 0; /* Nothing to do */
3547         }
3548         /* RSS enabled */
3549         if (rss_hf == 0) /* Disable RSS */
3550                 return -(EINVAL);
3551         ixgbe_hw_rss_hash_set(hw, rss_conf);
3552         return 0;
3553 }
3554
3555 int
3556 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3557                             struct rte_eth_rss_conf *rss_conf)
3558 {
3559         struct ixgbe_hw *hw;
3560         uint8_t *hash_key;
3561         uint32_t mrqc;
3562         uint32_t rss_key;
3563         uint64_t rss_hf;
3564         uint16_t i;
3565         uint32_t mrqc_reg;
3566         uint32_t rssrk_reg;
3567
3568         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3569         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3570         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3571         hash_key = rss_conf->rss_key;
3572         if (hash_key != NULL) {
3573                 /* Return RSS hash key */
3574                 for (i = 0; i < 10; i++) {
3575                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3576                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3577                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3578                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3579                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3580                 }
3581         }
3582
3583         /* Get RSS functions configured in MRQC register */
3584         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3585         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3586                 rss_conf->rss_hf = 0;
3587                 return 0;
3588         }
3589         rss_hf = 0;
3590         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3591                 rss_hf |= ETH_RSS_IPV4;
3592         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3593                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3594         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3595                 rss_hf |= ETH_RSS_IPV6;
3596         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3597                 rss_hf |= ETH_RSS_IPV6_EX;
3598         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3599                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3600         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3601                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3602         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3603                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3604         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3605                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3606         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3607                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3608         rss_conf->rss_hf = rss_hf;
3609         return 0;
3610 }
3611
3612 static void
3613 ixgbe_rss_configure(struct rte_eth_dev *dev)
3614 {
3615         struct rte_eth_rss_conf rss_conf;
3616         struct ixgbe_adapter *adapter;
3617         struct ixgbe_hw *hw;
3618         uint32_t reta;
3619         uint16_t i;
3620         uint16_t j;
3621         uint16_t sp_reta_size;
3622         uint32_t reta_reg;
3623
3624         PMD_INIT_FUNC_TRACE();
3625         adapter = dev->data->dev_private;
3626         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3627
3628         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3629
3630         /*
3631          * Fill in redirection table
3632          * The byte-swap is needed because NIC registers are in
3633          * little-endian order.
3634          */
3635         if (adapter->rss_reta_updated == 0) {
3636                 reta = 0;
3637                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3638                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3639
3640                         if (j == dev->data->nb_rx_queues)
3641                                 j = 0;
3642                         reta = (reta << 8) | j;
3643                         if ((i & 3) == 3)
3644                                 IXGBE_WRITE_REG(hw, reta_reg,
3645                                                 rte_bswap32(reta));
3646                 }
3647         }
3648
3649         /*
3650          * Configure the RSS key and the RSS protocols used to compute
3651          * the RSS hash of input packets.
3652          */
3653         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3654         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3655                 ixgbe_rss_disable(dev);
3656                 return;
3657         }
3658         if (rss_conf.rss_key == NULL)
3659                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3660         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3661 }
3662
3663 #define NUM_VFTA_REGISTERS 128
3664 #define NIC_RX_BUFFER_SIZE 0x200
3665 #define X550_RX_BUFFER_SIZE 0x180
3666
3667 static void
3668 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3669 {
3670         struct rte_eth_vmdq_dcb_conf *cfg;
3671         struct ixgbe_hw *hw;
3672         enum rte_eth_nb_pools num_pools;
3673         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3674         uint16_t pbsize;
3675         uint8_t nb_tcs; /* number of traffic classes */
3676         int i;
3677
3678         PMD_INIT_FUNC_TRACE();
3679         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3680         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3681         num_pools = cfg->nb_queue_pools;
3682         /* Check we have a valid number of pools */
3683         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3684                 ixgbe_rss_disable(dev);
3685                 return;
3686         }
3687         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3688         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3689
3690         /*
3691          * RXPBSIZE
3692          * split rx buffer up into sections, each for 1 traffic class
3693          */
3694         switch (hw->mac.type) {
3695         case ixgbe_mac_X550:
3696         case ixgbe_mac_X550EM_x:
3697         case ixgbe_mac_X550EM_a:
3698                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3699                 break;
3700         default:
3701                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3702                 break;
3703         }
3704         for (i = 0; i < nb_tcs; i++) {
3705                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3706
3707                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3708                 /* clear 10 bits. */
3709                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3710                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3711         }
3712         /* zero alloc all unused TCs */
3713         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3714                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3715
3716                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3717                 /* clear 10 bits. */
3718                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3719         }
3720
3721         /* MRQC: enable vmdq and dcb */
3722         mrqc = (num_pools == ETH_16_POOLS) ?
3723                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3724         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3725
3726         /* PFVTCTL: turn on virtualisation and set the default pool */
3727         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3728         if (cfg->enable_default_pool) {
3729                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3730         } else {
3731                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3732         }
3733
3734         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3735
3736         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3737         queue_mapping = 0;
3738         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3739                 /*
3740                  * mapping is done with 3 bits per priority,
3741                  * so shift by i*3 each time
3742                  */
3743                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3744
3745         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3746
3747         /* RTRPCS: DCB related */
3748         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3749
3750         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3751         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3752         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3753         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3754
3755         /* VFTA - enable all vlan filters */
3756         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3757                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3758         }
3759
3760         /* VFRE: pool enabling for receive - 16 or 32 */
3761         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3762                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3763
3764         /*
3765          * MPSAR - allow pools to read specific mac addresses
3766          * In this case, all pools should be able to read from mac addr 0
3767          */
3768         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3769         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3770
3771         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3772         for (i = 0; i < cfg->nb_pool_maps; i++) {
3773                 /* set vlan id in VF register and set the valid bit */
3774                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3775                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3776                 /*
3777                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3778                  * pools, we only need to use the first half of the register
3779                  * i.e. bits 0-31
3780                  */
3781                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3782         }
3783 }
3784
3785 /**
3786  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3787  * @dev: pointer to eth_dev structure
3788  * @dcb_config: pointer to ixgbe_dcb_config structure
3789  */
3790 static void
3791 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3792                        struct ixgbe_dcb_config *dcb_config)
3793 {
3794         uint32_t reg;
3795         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3796
3797         PMD_INIT_FUNC_TRACE();
3798         if (hw->mac.type != ixgbe_mac_82598EB) {
3799                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3800                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3801                 reg |= IXGBE_RTTDCS_ARBDIS;
3802                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3803
3804                 /* Enable DCB for Tx with 8 TCs */
3805                 if (dcb_config->num_tcs.pg_tcs == 8) {
3806                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3807                 } else {
3808                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3809                 }
3810                 if (dcb_config->vt_mode)
3811                         reg |= IXGBE_MTQC_VT_ENA;
3812                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3813
3814                 /* Enable the Tx desc arbiter */
3815                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3816                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3817                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3818
3819                 /* Enable Security TX Buffer IFG for DCB */
3820                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3821                 reg |= IXGBE_SECTX_DCB;
3822                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3823         }
3824 }
3825
3826 /**
3827  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3828  * @dev: pointer to rte_eth_dev structure
3829  * @dcb_config: pointer to ixgbe_dcb_config structure
3830  */
3831 static void
3832 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3833                         struct ixgbe_dcb_config *dcb_config)
3834 {
3835         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3836                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3837         struct ixgbe_hw *hw =
3838                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3839
3840         PMD_INIT_FUNC_TRACE();
3841         if (hw->mac.type != ixgbe_mac_82598EB)
3842                 /*PF VF Transmit Enable*/
3843                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3844                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3845
3846         /*Configure general DCB TX parameters*/
3847         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3848 }
3849
3850 static void
3851 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3852                         struct ixgbe_dcb_config *dcb_config)
3853 {
3854         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3855                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3856         struct ixgbe_dcb_tc_config *tc;
3857         uint8_t i, j;
3858
3859         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3860         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3861                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3862                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3863         } else {
3864                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3865                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3866         }
3867
3868         /* Initialize User Priority to Traffic Class mapping */
3869         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3870                 tc = &dcb_config->tc_config[j];
3871                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3872         }
3873
3874         /* User Priority to Traffic Class mapping */
3875         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3876                 j = vmdq_rx_conf->dcb_tc[i];
3877                 tc = &dcb_config->tc_config[j];
3878                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3879                                                 (uint8_t)(1 << i);
3880         }
3881 }
3882
3883 static void
3884 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3885                         struct ixgbe_dcb_config *dcb_config)
3886 {
3887         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3888                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3889         struct ixgbe_dcb_tc_config *tc;
3890         uint8_t i, j;
3891
3892         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3893         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3894                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3895                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3896         } else {
3897                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3898                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3899         }
3900
3901         /* Initialize User Priority to Traffic Class mapping */
3902         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3903                 tc = &dcb_config->tc_config[j];
3904                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3905         }
3906
3907         /* User Priority to Traffic Class mapping */
3908         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3909                 j = vmdq_tx_conf->dcb_tc[i];
3910                 tc = &dcb_config->tc_config[j];
3911                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3912                                                 (uint8_t)(1 << i);
3913         }
3914 }
3915
3916 static void
3917 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3918                 struct ixgbe_dcb_config *dcb_config)
3919 {
3920         struct rte_eth_dcb_rx_conf *rx_conf =
3921                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3922         struct ixgbe_dcb_tc_config *tc;
3923         uint8_t i, j;
3924
3925         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3926         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3927
3928         /* Initialize User Priority to Traffic Class mapping */
3929         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3930                 tc = &dcb_config->tc_config[j];
3931                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3932         }
3933
3934         /* User Priority to Traffic Class mapping */
3935         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3936                 j = rx_conf->dcb_tc[i];
3937                 tc = &dcb_config->tc_config[j];
3938                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3939                                                 (uint8_t)(1 << i);
3940         }
3941 }
3942
3943 static void
3944 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3945                 struct ixgbe_dcb_config *dcb_config)
3946 {
3947         struct rte_eth_dcb_tx_conf *tx_conf =
3948                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3949         struct ixgbe_dcb_tc_config *tc;
3950         uint8_t i, j;
3951
3952         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3953         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3954
3955         /* Initialize User Priority to Traffic Class mapping */
3956         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3957                 tc = &dcb_config->tc_config[j];
3958                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3959         }
3960
3961         /* User Priority to Traffic Class mapping */
3962         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3963                 j = tx_conf->dcb_tc[i];
3964                 tc = &dcb_config->tc_config[j];
3965                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3966                                                 (uint8_t)(1 << i);
3967         }
3968 }
3969
3970 /**
3971  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3972  * @dev: pointer to eth_dev structure
3973  * @dcb_config: pointer to ixgbe_dcb_config structure
3974  */
3975 static void
3976 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3977                        struct ixgbe_dcb_config *dcb_config)
3978 {
3979         uint32_t reg;
3980         uint32_t vlanctrl;
3981         uint8_t i;
3982         uint32_t q;
3983         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3984
3985         PMD_INIT_FUNC_TRACE();
3986         /*
3987          * Disable the arbiter before changing parameters
3988          * (always enable recycle mode; WSP)
3989          */
3990         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3991         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3992
3993         if (hw->mac.type != ixgbe_mac_82598EB) {
3994                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3995                 if (dcb_config->num_tcs.pg_tcs == 4) {
3996                         if (dcb_config->vt_mode)
3997                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3998                                         IXGBE_MRQC_VMDQRT4TCEN;
3999                         else {
4000                                 /* no matter the mode is DCB or DCB_RSS, just
4001                                  * set the MRQE to RSSXTCEN. RSS is controlled
4002                                  * by RSS_FIELD
4003                                  */
4004                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4005                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4006                                         IXGBE_MRQC_RTRSS4TCEN;
4007                         }
4008                 }
4009                 if (dcb_config->num_tcs.pg_tcs == 8) {
4010                         if (dcb_config->vt_mode)
4011                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4012                                         IXGBE_MRQC_VMDQRT8TCEN;
4013                         else {
4014                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4015                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4016                                         IXGBE_MRQC_RTRSS8TCEN;
4017                         }
4018                 }
4019
4020                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4021
4022                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4023                         /* Disable drop for all queues in VMDQ mode*/
4024                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4025                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4026                                                 (IXGBE_QDE_WRITE |
4027                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4028                 } else {
4029                         /* Enable drop for all queues in SRIOV mode */
4030                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4031                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4032                                                 (IXGBE_QDE_WRITE |
4033                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4034                                                  IXGBE_QDE_ENABLE));
4035                 }
4036         }
4037
4038         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4039         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4040         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4041         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4042
4043         /* VFTA - enable all vlan filters */
4044         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4045                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4046         }
4047
4048         /*
4049          * Configure Rx packet plane (recycle mode; WSP) and
4050          * enable arbiter
4051          */
4052         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4053         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4054 }
4055
4056 static void
4057 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4058                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4059 {
4060         switch (hw->mac.type) {
4061         case ixgbe_mac_82598EB:
4062                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4063                 break;
4064         case ixgbe_mac_82599EB:
4065         case ixgbe_mac_X540:
4066         case ixgbe_mac_X550:
4067         case ixgbe_mac_X550EM_x:
4068         case ixgbe_mac_X550EM_a:
4069                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4070                                                   tsa, map);
4071                 break;
4072         default:
4073                 break;
4074         }
4075 }
4076
4077 static void
4078 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4079                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4080 {
4081         switch (hw->mac.type) {
4082         case ixgbe_mac_82598EB:
4083                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4084                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4085                 break;
4086         case ixgbe_mac_82599EB:
4087         case ixgbe_mac_X540:
4088         case ixgbe_mac_X550:
4089         case ixgbe_mac_X550EM_x:
4090         case ixgbe_mac_X550EM_a:
4091                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4092                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4093                 break;
4094         default:
4095                 break;
4096         }
4097 }
4098
4099 #define DCB_RX_CONFIG  1
4100 #define DCB_TX_CONFIG  1
4101 #define DCB_TX_PB      1024
4102 /**
4103  * ixgbe_dcb_hw_configure - Enable DCB and configure
4104  * general DCB in VT mode and non-VT mode parameters
4105  * @dev: pointer to rte_eth_dev structure
4106  * @dcb_config: pointer to ixgbe_dcb_config structure
4107  */
4108 static int
4109 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4110                         struct ixgbe_dcb_config *dcb_config)
4111 {
4112         int     ret = 0;
4113         uint8_t i, pfc_en, nb_tcs;
4114         uint16_t pbsize, rx_buffer_size;
4115         uint8_t config_dcb_rx = 0;
4116         uint8_t config_dcb_tx = 0;
4117         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4118         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4119         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4120         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4121         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4122         struct ixgbe_dcb_tc_config *tc;
4123         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4124                 RTE_ETHER_CRC_LEN;
4125         struct ixgbe_hw *hw =
4126                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4127         struct ixgbe_bw_conf *bw_conf =
4128                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4129
4130         switch (dev->data->dev_conf.rxmode.mq_mode) {
4131         case ETH_MQ_RX_VMDQ_DCB:
4132                 dcb_config->vt_mode = true;
4133                 if (hw->mac.type != ixgbe_mac_82598EB) {
4134                         config_dcb_rx = DCB_RX_CONFIG;
4135                         /*
4136                          *get dcb and VT rx configuration parameters
4137                          *from rte_eth_conf
4138                          */
4139                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4140                         /*Configure general VMDQ and DCB RX parameters*/
4141                         ixgbe_vmdq_dcb_configure(dev);
4142                 }
4143                 break;
4144         case ETH_MQ_RX_DCB:
4145         case ETH_MQ_RX_DCB_RSS:
4146                 dcb_config->vt_mode = false;
4147                 config_dcb_rx = DCB_RX_CONFIG;
4148                 /* Get dcb TX configuration parameters from rte_eth_conf */
4149                 ixgbe_dcb_rx_config(dev, dcb_config);
4150                 /*Configure general DCB RX parameters*/
4151                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4152                 break;
4153         default:
4154                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4155                 break;
4156         }
4157         switch (dev->data->dev_conf.txmode.mq_mode) {
4158         case ETH_MQ_TX_VMDQ_DCB:
4159                 dcb_config->vt_mode = true;
4160                 config_dcb_tx = DCB_TX_CONFIG;
4161                 /* get DCB and VT TX configuration parameters
4162                  * from rte_eth_conf
4163                  */
4164                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4165                 /*Configure general VMDQ and DCB TX parameters*/
4166                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4167                 break;
4168
4169         case ETH_MQ_TX_DCB:
4170                 dcb_config->vt_mode = false;
4171                 config_dcb_tx = DCB_TX_CONFIG;
4172                 /*get DCB TX configuration parameters from rte_eth_conf*/
4173                 ixgbe_dcb_tx_config(dev, dcb_config);
4174                 /*Configure general DCB TX parameters*/
4175                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4176                 break;
4177         default:
4178                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4179                 break;
4180         }
4181
4182         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4183         /* Unpack map */
4184         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4185         if (nb_tcs == ETH_4_TCS) {
4186                 /* Avoid un-configured priority mapping to TC0 */
4187                 uint8_t j = 4;
4188                 uint8_t mask = 0xFF;
4189
4190                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4191                         mask = (uint8_t)(mask & (~(1 << map[i])));
4192                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4193                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4194                                 map[j++] = i;
4195                         mask >>= 1;
4196                 }
4197                 /* Re-configure 4 TCs BW */
4198                 for (i = 0; i < nb_tcs; i++) {
4199                         tc = &dcb_config->tc_config[i];
4200                         if (bw_conf->tc_num != nb_tcs)
4201                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4202                                         (uint8_t)(100 / nb_tcs);
4203                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4204                                                 (uint8_t)(100 / nb_tcs);
4205                 }
4206                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4207                         tc = &dcb_config->tc_config[i];
4208                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4209                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4210                 }
4211         } else {
4212                 /* Re-configure 8 TCs BW */
4213                 for (i = 0; i < nb_tcs; i++) {
4214                         tc = &dcb_config->tc_config[i];
4215                         if (bw_conf->tc_num != nb_tcs)
4216                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4217                                         (uint8_t)(100 / nb_tcs + (i & 1));
4218                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4219                                 (uint8_t)(100 / nb_tcs + (i & 1));
4220                 }
4221         }
4222
4223         switch (hw->mac.type) {
4224         case ixgbe_mac_X550:
4225         case ixgbe_mac_X550EM_x:
4226         case ixgbe_mac_X550EM_a:
4227                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4228                 break;
4229         default:
4230                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4231                 break;
4232         }
4233
4234         if (config_dcb_rx) {
4235                 /* Set RX buffer size */
4236                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4237                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4238
4239                 for (i = 0; i < nb_tcs; i++) {
4240                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4241                 }
4242                 /* zero alloc all unused TCs */
4243                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4244                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4245                 }
4246         }
4247         if (config_dcb_tx) {
4248                 /* Only support an equally distributed
4249                  *  Tx packet buffer strategy.
4250                  */
4251                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4252                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4253
4254                 for (i = 0; i < nb_tcs; i++) {
4255                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4256                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4257                 }
4258                 /* Clear unused TCs, if any, to zero buffer size*/
4259                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4260                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4261                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4262                 }
4263         }
4264
4265         /*Calculates traffic class credits*/
4266         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4267                                 IXGBE_DCB_TX_CONFIG);
4268         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4269                                 IXGBE_DCB_RX_CONFIG);
4270
4271         if (config_dcb_rx) {
4272                 /* Unpack CEE standard containers */
4273                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4274                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4275                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4276                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4277                 /* Configure PG(ETS) RX */
4278                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4279         }
4280
4281         if (config_dcb_tx) {
4282                 /* Unpack CEE standard containers */
4283                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4284                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4285                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4286                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4287                 /* Configure PG(ETS) TX */
4288                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4289         }
4290
4291         /*Configure queue statistics registers*/
4292         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4293
4294         /* Check if the PFC is supported */
4295         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4296                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4297                 for (i = 0; i < nb_tcs; i++) {
4298                         /*
4299                         * If the TC count is 8,and the default high_water is 48,
4300                         * the low_water is 16 as default.
4301                         */
4302                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4303                         hw->fc.low_water[i] = pbsize / 4;
4304                         /* Enable pfc for this TC */
4305                         tc = &dcb_config->tc_config[i];
4306                         tc->pfc = ixgbe_dcb_pfc_enabled;
4307                 }
4308                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4309                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4310                         pfc_en &= 0x0F;
4311                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4312         }
4313
4314         return ret;
4315 }
4316
4317 /**
4318  * ixgbe_configure_dcb - Configure DCB  Hardware
4319  * @dev: pointer to rte_eth_dev
4320  */
4321 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4322 {
4323         struct ixgbe_dcb_config *dcb_cfg =
4324                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4325         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4326
4327         PMD_INIT_FUNC_TRACE();
4328
4329         /* check support mq_mode for DCB */
4330         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4331             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4332             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4333                 return;
4334
4335         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4336                 return;
4337
4338         /** Configure DCB hardware **/
4339         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4340 }
4341
4342 /*
4343  * VMDq only support for 10 GbE NIC.
4344  */
4345 static void
4346 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4347 {
4348         struct rte_eth_vmdq_rx_conf *cfg;
4349         struct ixgbe_hw *hw;
4350         enum rte_eth_nb_pools num_pools;
4351         uint32_t mrqc, vt_ctl, vlanctrl;
4352         uint32_t vmolr = 0;
4353         int i;
4354
4355         PMD_INIT_FUNC_TRACE();
4356         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4357         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4358         num_pools = cfg->nb_queue_pools;
4359
4360         ixgbe_rss_disable(dev);
4361
4362         /* MRQC: enable vmdq */
4363         mrqc = IXGBE_MRQC_VMDQEN;
4364         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4365
4366         /* PFVTCTL: turn on virtualisation and set the default pool */
4367         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4368         if (cfg->enable_default_pool)
4369                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4370         else
4371                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4372
4373         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4374
4375         for (i = 0; i < (int)num_pools; i++) {
4376                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4377                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4378         }
4379
4380         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4381         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4382         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4383         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4384
4385         /* VFTA - enable all vlan filters */
4386         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4387                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4388
4389         /* VFRE: pool enabling for receive - 64 */
4390         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4391         if (num_pools == ETH_64_POOLS)
4392                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4393
4394         /*
4395          * MPSAR - allow pools to read specific mac addresses
4396          * In this case, all pools should be able to read from mac addr 0
4397          */
4398         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4399         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4400
4401         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4402         for (i = 0; i < cfg->nb_pool_maps; i++) {
4403                 /* set vlan id in VF register and set the valid bit */
4404                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4405                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4406                 /*
4407                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4408                  * pools, we only need to use the first half of the register
4409                  * i.e. bits 0-31
4410                  */
4411                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4412                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4413                                         (cfg->pool_map[i].pools & UINT32_MAX));
4414                 else
4415                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4416                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4417
4418         }
4419
4420         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4421         if (cfg->enable_loop_back) {
4422                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4423                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4424                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4425         }
4426
4427         IXGBE_WRITE_FLUSH(hw);
4428 }
4429
4430 /*
4431  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4432  * @hw: pointer to hardware structure
4433  */
4434 static void
4435 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4436 {
4437         uint32_t reg;
4438         uint32_t q;
4439
4440         PMD_INIT_FUNC_TRACE();
4441         /*PF VF Transmit Enable*/
4442         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4443         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4444
4445         /* Disable the Tx desc arbiter so that MTQC can be changed */
4446         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4447         reg |= IXGBE_RTTDCS_ARBDIS;
4448         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4449
4450         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4451         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4452
4453         /* Disable drop for all queues */
4454         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4455                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4456                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4457
4458         /* Enable the Tx desc arbiter */
4459         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4460         reg &= ~IXGBE_RTTDCS_ARBDIS;
4461         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4462
4463         IXGBE_WRITE_FLUSH(hw);
4464 }
4465
4466 static int __rte_cold
4467 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4468 {
4469         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4470         uint64_t dma_addr;
4471         unsigned int i;
4472
4473         /* Initialize software ring entries */
4474         for (i = 0; i < rxq->nb_rx_desc; i++) {
4475                 volatile union ixgbe_adv_rx_desc *rxd;
4476                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4477
4478                 if (mbuf == NULL) {
4479                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4480                                      (unsigned) rxq->queue_id);
4481                         return -ENOMEM;
4482                 }
4483
4484                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4485                 mbuf->port = rxq->port_id;
4486
4487                 dma_addr =
4488                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4489                 rxd = &rxq->rx_ring[i];
4490                 rxd->read.hdr_addr = 0;
4491                 rxd->read.pkt_addr = dma_addr;
4492                 rxe[i].mbuf = mbuf;
4493         }
4494
4495         return 0;
4496 }
4497
4498 static int
4499 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4500 {
4501         struct ixgbe_hw *hw;
4502         uint32_t mrqc;
4503
4504         ixgbe_rss_configure(dev);
4505
4506         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4507
4508         /* MRQC: enable VF RSS */
4509         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4510         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4511         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4512         case ETH_64_POOLS:
4513                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4514                 break;
4515
4516         case ETH_32_POOLS:
4517                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4518                 break;
4519
4520         default:
4521                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4522                 return -EINVAL;
4523         }
4524
4525         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4526
4527         return 0;
4528 }
4529
4530 static int
4531 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4532 {
4533         struct ixgbe_hw *hw =
4534                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4535
4536         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4537         case ETH_64_POOLS:
4538                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4539                         IXGBE_MRQC_VMDQEN);
4540                 break;
4541
4542         case ETH_32_POOLS:
4543                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4544                         IXGBE_MRQC_VMDQRT4TCEN);
4545                 break;
4546
4547         case ETH_16_POOLS:
4548                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4549                         IXGBE_MRQC_VMDQRT8TCEN);
4550                 break;
4551         default:
4552                 PMD_INIT_LOG(ERR,
4553                         "invalid pool number in IOV mode");
4554                 break;
4555         }
4556         return 0;
4557 }
4558
4559 static int
4560 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4561 {
4562         struct ixgbe_hw *hw =
4563                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4564
4565         if (hw->mac.type == ixgbe_mac_82598EB)
4566                 return 0;
4567
4568         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4569                 /*
4570                  * SRIOV inactive scheme
4571                  * any DCB/RSS w/o VMDq multi-queue setting
4572                  */
4573                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4574                 case ETH_MQ_RX_RSS:
4575                 case ETH_MQ_RX_DCB_RSS:
4576                 case ETH_MQ_RX_VMDQ_RSS:
4577                         ixgbe_rss_configure(dev);
4578                         break;
4579
4580                 case ETH_MQ_RX_VMDQ_DCB:
4581                         ixgbe_vmdq_dcb_configure(dev);
4582                         break;
4583
4584                 case ETH_MQ_RX_VMDQ_ONLY:
4585                         ixgbe_vmdq_rx_hw_configure(dev);
4586                         break;
4587
4588                 case ETH_MQ_RX_NONE:
4589                 default:
4590                         /* if mq_mode is none, disable rss mode.*/
4591                         ixgbe_rss_disable(dev);
4592                         break;
4593                 }
4594         } else {
4595                 /* SRIOV active scheme
4596                  * Support RSS together with SRIOV.
4597                  */
4598                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4599                 case ETH_MQ_RX_RSS:
4600                 case ETH_MQ_RX_VMDQ_RSS:
4601                         ixgbe_config_vf_rss(dev);
4602                         break;
4603                 case ETH_MQ_RX_VMDQ_DCB:
4604                 case ETH_MQ_RX_DCB:
4605                 /* In SRIOV, the configuration is the same as VMDq case */
4606                         ixgbe_vmdq_dcb_configure(dev);
4607                         break;
4608                 /* DCB/RSS together with SRIOV is not supported */
4609                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4610                 case ETH_MQ_RX_DCB_RSS:
4611                         PMD_INIT_LOG(ERR,
4612                                 "Could not support DCB/RSS with VMDq & SRIOV");
4613                         return -1;
4614                 default:
4615                         ixgbe_config_vf_default(dev);
4616                         break;
4617                 }
4618         }
4619
4620         return 0;
4621 }
4622
4623 static int
4624 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4625 {
4626         struct ixgbe_hw *hw =
4627                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4628         uint32_t mtqc;
4629         uint32_t rttdcs;
4630
4631         if (hw->mac.type == ixgbe_mac_82598EB)
4632                 return 0;
4633
4634         /* disable arbiter before setting MTQC */
4635         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4636         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4637         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4638
4639         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4640                 /*
4641                  * SRIOV inactive scheme
4642                  * any DCB w/o VMDq multi-queue setting
4643                  */
4644                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4645                         ixgbe_vmdq_tx_hw_configure(hw);
4646                 else {
4647                         mtqc = IXGBE_MTQC_64Q_1PB;
4648                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4649                 }
4650         } else {
4651                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4652
4653                 /*
4654                  * SRIOV active scheme
4655                  * FIXME if support DCB together with VMDq & SRIOV
4656                  */
4657                 case ETH_64_POOLS:
4658                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4659                         break;
4660                 case ETH_32_POOLS:
4661                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4662                         break;
4663                 case ETH_16_POOLS:
4664                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4665                                 IXGBE_MTQC_8TC_8TQ;
4666                         break;
4667                 default:
4668                         mtqc = IXGBE_MTQC_64Q_1PB;
4669                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4670                 }
4671                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4672         }
4673
4674         /* re-enable arbiter */
4675         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4676         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4677
4678         return 0;
4679 }
4680
4681 /**
4682  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4683  *
4684  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4685  * spec rev. 3.0 chapter 8.2.3.8.13.
4686  *
4687  * @pool Memory pool of the Rx queue
4688  */
4689 static inline uint32_t
4690 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4691 {
4692         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4693
4694         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4695         uint16_t maxdesc =
4696                 RTE_IPV4_MAX_PKT_LEN /
4697                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4698
4699         if (maxdesc >= 16)
4700                 return IXGBE_RSCCTL_MAXDESC_16;
4701         else if (maxdesc >= 8)
4702                 return IXGBE_RSCCTL_MAXDESC_8;
4703         else if (maxdesc >= 4)
4704                 return IXGBE_RSCCTL_MAXDESC_4;
4705         else
4706                 return IXGBE_RSCCTL_MAXDESC_1;
4707 }
4708
4709 /**
4710  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4711  * interrupt
4712  *
4713  * (Taken from FreeBSD tree)
4714  * (yes this is all very magic and confusing :)
4715  *
4716  * @dev port handle
4717  * @entry the register array entry
4718  * @vector the MSIX vector for this queue
4719  * @type RX/TX/MISC
4720  */
4721 static void
4722 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4723 {
4724         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4725         u32 ivar, index;
4726
4727         vector |= IXGBE_IVAR_ALLOC_VAL;
4728
4729         switch (hw->mac.type) {
4730
4731         case ixgbe_mac_82598EB:
4732                 if (type == -1)
4733                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4734                 else
4735                         entry += (type * 64);
4736                 index = (entry >> 2) & 0x1F;
4737                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4738                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4739                 ivar |= (vector << (8 * (entry & 0x3)));
4740                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4741                 break;
4742
4743         case ixgbe_mac_82599EB:
4744         case ixgbe_mac_X540:
4745                 if (type == -1) { /* MISC IVAR */
4746                         index = (entry & 1) * 8;
4747                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4748                         ivar &= ~(0xFF << index);
4749                         ivar |= (vector << index);
4750                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4751                 } else {        /* RX/TX IVARS */
4752                         index = (16 * (entry & 1)) + (8 * type);
4753                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4754                         ivar &= ~(0xFF << index);
4755                         ivar |= (vector << index);
4756                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4757                 }
4758
4759                 break;
4760
4761         default:
4762                 break;
4763         }
4764 }
4765
4766 void __rte_cold
4767 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4768 {
4769         uint16_t i, rx_using_sse;
4770         struct ixgbe_adapter *adapter = dev->data->dev_private;
4771
4772         /*
4773          * In order to allow Vector Rx there are a few configuration
4774          * conditions to be met and Rx Bulk Allocation should be allowed.
4775          */
4776         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4777             !adapter->rx_bulk_alloc_allowed ||
4778                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4779                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4780                                     "preconditions",
4781                              dev->data->port_id);
4782
4783                 adapter->rx_vec_allowed = false;
4784         }
4785
4786         /*
4787          * Initialize the appropriate LRO callback.
4788          *
4789          * If all queues satisfy the bulk allocation preconditions
4790          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4791          * Otherwise use a single allocation version.
4792          */
4793         if (dev->data->lro) {
4794                 if (adapter->rx_bulk_alloc_allowed) {
4795                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4796                                            "allocation version");
4797                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4798                 } else {
4799                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4800                                            "allocation version");
4801                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4802                 }
4803         } else if (dev->data->scattered_rx) {
4804                 /*
4805                  * Set the non-LRO scattered callback: there are Vector and
4806                  * single allocation versions.
4807                  */
4808                 if (adapter->rx_vec_allowed) {
4809                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4810                                             "callback (port=%d).",
4811                                      dev->data->port_id);
4812
4813                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4814                 } else if (adapter->rx_bulk_alloc_allowed) {
4815                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4816                                            "allocation callback (port=%d).",
4817                                      dev->data->port_id);
4818                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4819                 } else {
4820                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4821                                             "single allocation) "
4822                                             "Scattered Rx callback "
4823                                             "(port=%d).",
4824                                      dev->data->port_id);
4825
4826                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4827                 }
4828         /*
4829          * Below we set "simple" callbacks according to port/queues parameters.
4830          * If parameters allow we are going to choose between the following
4831          * callbacks:
4832          *    - Vector
4833          *    - Bulk Allocation
4834          *    - Single buffer allocation (the simplest one)
4835          */
4836         } else if (adapter->rx_vec_allowed) {
4837                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4838                                     "burst size no less than %d (port=%d).",
4839                              RTE_IXGBE_DESCS_PER_LOOP,
4840                              dev->data->port_id);
4841
4842                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4843         } else if (adapter->rx_bulk_alloc_allowed) {
4844                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4845                                     "satisfied. Rx Burst Bulk Alloc function "
4846                                     "will be used on port=%d.",
4847                              dev->data->port_id);
4848
4849                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4850         } else {
4851                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4852                                     "satisfied, or Scattered Rx is requested "
4853                                     "(port=%d).",
4854                              dev->data->port_id);
4855
4856                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4857         }
4858
4859         /* Propagate information about RX function choice through all queues. */
4860
4861         rx_using_sse =
4862                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4863                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4864
4865         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4866                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4867
4868                 rxq->rx_using_sse = rx_using_sse;
4869 #ifdef RTE_LIB_SECURITY
4870                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4871                                 DEV_RX_OFFLOAD_SECURITY);
4872 #endif
4873         }
4874 }
4875
4876 /**
4877  * ixgbe_set_rsc - configure RSC related port HW registers
4878  *
4879  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4880  * of 82599 Spec (x540 configuration is virtually the same).
4881  *
4882  * @dev port handle
4883  *
4884  * Returns 0 in case of success or a non-zero error code
4885  */
4886 static int
4887 ixgbe_set_rsc(struct rte_eth_dev *dev)
4888 {
4889         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4890         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4891         struct rte_eth_dev_info dev_info = { 0 };
4892         bool rsc_capable = false;
4893         uint16_t i;
4894         uint32_t rdrxctl;
4895         uint32_t rfctl;
4896
4897         /* Sanity check */
4898         dev->dev_ops->dev_infos_get(dev, &dev_info);
4899         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4900                 rsc_capable = true;
4901
4902         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4903                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4904                                    "support it");
4905                 return -EINVAL;
4906         }
4907
4908         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4909
4910         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4911              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4912                 /*
4913                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4914                  * 3.0 RSC configuration requires HW CRC stripping being
4915                  * enabled. If user requested both HW CRC stripping off
4916                  * and RSC on - return an error.
4917                  */
4918                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4919                                     "is disabled");
4920                 return -EINVAL;
4921         }
4922
4923         /* RFCTL configuration  */
4924         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4925         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4926                 rfctl &= ~IXGBE_RFCTL_RSC_DIS;
4927         else
4928                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4929         /* disable NFS filtering */
4930         rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS;
4931         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4932
4933         /* If LRO hasn't been requested - we are done here. */
4934         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4935                 return 0;
4936
4937         /* Set RDRXCTL.RSCACKC bit */
4938         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4939         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4940         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4941
4942         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4943         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4944                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4945                 uint32_t srrctl =
4946                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4947                 uint32_t rscctl =
4948                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4949                 uint32_t psrtype =
4950                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4951                 uint32_t eitr =
4952                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4953
4954                 /*
4955                  * ixgbe PMD doesn't support header-split at the moment.
4956                  *
4957                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4958                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4959                  * should be configured even if header split is not
4960                  * enabled. We will configure it 128 bytes following the
4961                  * recommendation in the spec.
4962                  */
4963                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4964                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4965                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4966
4967                 /*
4968                  * TODO: Consider setting the Receive Descriptor Minimum
4969                  * Threshold Size for an RSC case. This is not an obviously
4970                  * beneficiary option but the one worth considering...
4971                  */
4972
4973                 rscctl |= IXGBE_RSCCTL_RSCEN;
4974                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4975                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4976
4977                 /*
4978                  * RSC: Set ITR interval corresponding to 2K ints/s.
4979                  *
4980                  * Full-sized RSC aggregations for a 10Gb/s link will
4981                  * arrive at about 20K aggregation/s rate.
4982                  *
4983                  * 2K inst/s rate will make only 10% of the
4984                  * aggregations to be closed due to the interrupt timer
4985                  * expiration for a streaming at wire-speed case.
4986                  *
4987                  * For a sparse streaming case this setting will yield
4988                  * at most 500us latency for a single RSC aggregation.
4989                  */
4990                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4991                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4992                 eitr |= IXGBE_EITR_CNT_WDIS;
4993
4994                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4995                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4996                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4997                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4998
4999                 /*
5000                  * RSC requires the mapping of the queue to the
5001                  * interrupt vector.
5002                  */
5003                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5004         }
5005
5006         dev->data->lro = 1;
5007
5008         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5009
5010         return 0;
5011 }
5012
5013 /*
5014  * Initializes Receive Unit.
5015  */
5016 int __rte_cold
5017 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5018 {
5019         struct ixgbe_hw     *hw;
5020         struct ixgbe_rx_queue *rxq;
5021         uint64_t bus_addr;
5022         uint32_t rxctrl;
5023         uint32_t fctrl;
5024         uint32_t hlreg0;
5025         uint32_t maxfrs;
5026         uint32_t srrctl;
5027         uint32_t rdrxctl;
5028         uint32_t rxcsum;
5029         uint16_t buf_size;
5030         uint16_t i;
5031         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5032         int rc;
5033
5034         PMD_INIT_FUNC_TRACE();
5035         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5036
5037         /*
5038          * Make sure receives are disabled while setting
5039          * up the RX context (registers, descriptor rings, etc.).
5040          */
5041         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5042         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5043
5044         /* Enable receipt of broadcasted frames */
5045         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5046         fctrl |= IXGBE_FCTRL_BAM;
5047         fctrl |= IXGBE_FCTRL_DPF;
5048         fctrl |= IXGBE_FCTRL_PMCF;
5049         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5050
5051         /*
5052          * Configure CRC stripping, if any.
5053          */
5054         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5055         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5056                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5057         else
5058                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5059
5060         /*
5061          * Configure jumbo frame support, if any.
5062          */
5063         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5064                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5065                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5066                 maxfrs &= 0x0000FFFF;
5067                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5068                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5069         } else
5070                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5071
5072         /*
5073          * If loopback mode is configured, set LPBK bit.
5074          */
5075         if (dev->data->dev_conf.lpbk_mode != 0) {
5076                 rc = ixgbe_check_supported_loopback_mode(dev);
5077                 if (rc < 0) {
5078                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5079                         return rc;
5080                 }
5081                 hlreg0 |= IXGBE_HLREG0_LPBK;
5082         } else {
5083                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5084         }
5085
5086         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5087
5088         /*
5089          * Assume no header split and no VLAN strip support
5090          * on any Rx queue first .
5091          */
5092         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5093         /* Setup RX queues */
5094         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5095                 rxq = dev->data->rx_queues[i];
5096
5097                 /*
5098                  * Reset crc_len in case it was changed after queue setup by a
5099                  * call to configure.
5100                  */
5101                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5102                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5103                 else
5104                         rxq->crc_len = 0;
5105
5106                 /* Setup the Base and Length of the Rx Descriptor Rings */
5107                 bus_addr = rxq->rx_ring_phys_addr;
5108                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5109                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5110                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5111                                 (uint32_t)(bus_addr >> 32));
5112                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5113                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5114                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5115                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5116
5117                 /* Configure the SRRCTL register */
5118                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5119
5120                 /* Set if packets are dropped when no descriptors available */
5121                 if (rxq->drop_en)
5122                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5123
5124                 /*
5125                  * Configure the RX buffer size in the BSIZEPACKET field of
5126                  * the SRRCTL register of the queue.
5127                  * The value is in 1 KB resolution. Valid values can be from
5128                  * 1 KB to 16 KB.
5129                  */
5130                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5131                         RTE_PKTMBUF_HEADROOM);
5132                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5133                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5134
5135                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5136
5137                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5138                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5139
5140                 /* It adds dual VLAN length for supporting dual VLAN */
5141                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5142                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5143                         dev->data->scattered_rx = 1;
5144                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5145                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5146         }
5147
5148         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5149                 dev->data->scattered_rx = 1;
5150
5151         /*
5152          * Device configured with multiple RX queues.
5153          */
5154         ixgbe_dev_mq_rx_configure(dev);
5155
5156         /*
5157          * Setup the Checksum Register.
5158          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5159          * Enable IP/L4 checkum computation by hardware if requested to do so.
5160          */
5161         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5162         rxcsum |= IXGBE_RXCSUM_PCSD;
5163         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5164                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5165         else
5166                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5167
5168         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5169
5170         if (hw->mac.type == ixgbe_mac_82599EB ||
5171             hw->mac.type == ixgbe_mac_X540) {
5172                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5173                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5174                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5175                 else
5176                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5177                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5178                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5179         }
5180
5181         rc = ixgbe_set_rsc(dev);
5182         if (rc)
5183                 return rc;
5184
5185         ixgbe_set_rx_function(dev);
5186
5187         return 0;
5188 }
5189
5190 /*
5191  * Initializes Transmit Unit.
5192  */
5193 void __rte_cold
5194 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5195 {
5196         struct ixgbe_hw     *hw;
5197         struct ixgbe_tx_queue *txq;
5198         uint64_t bus_addr;
5199         uint32_t hlreg0;
5200         uint32_t txctrl;
5201         uint16_t i;
5202
5203         PMD_INIT_FUNC_TRACE();
5204         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5205
5206         /* Enable TX CRC (checksum offload requirement) and hw padding
5207          * (TSO requirement)
5208          */
5209         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5210         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5211         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5212
5213         /* Setup the Base and Length of the Tx Descriptor Rings */
5214         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5215                 txq = dev->data->tx_queues[i];
5216
5217                 bus_addr = txq->tx_ring_phys_addr;
5218                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5219                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5220                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5221                                 (uint32_t)(bus_addr >> 32));
5222                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5223                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5224                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5225                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5226                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5227
5228                 /*
5229                  * Disable Tx Head Writeback RO bit, since this hoses
5230                  * bookkeeping if things aren't delivered in order.
5231                  */
5232                 switch (hw->mac.type) {
5233                 case ixgbe_mac_82598EB:
5234                         txctrl = IXGBE_READ_REG(hw,
5235                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5236                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5237                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5238                                         txctrl);
5239                         break;
5240
5241                 case ixgbe_mac_82599EB:
5242                 case ixgbe_mac_X540:
5243                 case ixgbe_mac_X550:
5244                 case ixgbe_mac_X550EM_x:
5245                 case ixgbe_mac_X550EM_a:
5246                 default:
5247                         txctrl = IXGBE_READ_REG(hw,
5248                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5249                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5250                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5251                                         txctrl);
5252                         break;
5253                 }
5254         }
5255
5256         /* Device configured with multiple TX queues. */
5257         ixgbe_dev_mq_tx_configure(dev);
5258 }
5259
5260 /*
5261  * Check if requested loopback mode is supported
5262  */
5263 int
5264 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5265 {
5266         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5267
5268         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5269                 if (hw->mac.type == ixgbe_mac_82599EB ||
5270                      hw->mac.type == ixgbe_mac_X540 ||
5271                      hw->mac.type == ixgbe_mac_X550 ||
5272                      hw->mac.type == ixgbe_mac_X550EM_x ||
5273                      hw->mac.type == ixgbe_mac_X550EM_a)
5274                         return 0;
5275
5276         return -ENOTSUP;
5277 }
5278
5279 /*
5280  * Set up link for 82599 loopback mode Tx->Rx.
5281  */
5282 static inline void __rte_cold
5283 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5284 {
5285         PMD_INIT_FUNC_TRACE();
5286
5287         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5288                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5289                                 IXGBE_SUCCESS) {
5290                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5291                         /* ignore error */
5292                         return;
5293                 }
5294         }
5295
5296         /* Restart link */
5297         IXGBE_WRITE_REG(hw,
5298                         IXGBE_AUTOC,
5299                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5300         ixgbe_reset_pipeline_82599(hw);
5301
5302         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5303         msec_delay(50);
5304 }
5305
5306
5307 /*
5308  * Start Transmit and Receive Units.
5309  */
5310 int __rte_cold
5311 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5312 {
5313         struct ixgbe_hw     *hw;
5314         struct ixgbe_tx_queue *txq;
5315         struct ixgbe_rx_queue *rxq;
5316         uint32_t txdctl;
5317         uint32_t dmatxctl;
5318         uint32_t rxctrl;
5319         uint16_t i;
5320         int ret = 0;
5321
5322         PMD_INIT_FUNC_TRACE();
5323         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5324
5325         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5326                 txq = dev->data->tx_queues[i];
5327                 /* Setup Transmit Threshold Registers */
5328                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5329                 txdctl |= txq->pthresh & 0x7F;
5330                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5331                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5332                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5333         }
5334
5335         if (hw->mac.type != ixgbe_mac_82598EB) {
5336                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5337                 dmatxctl |= IXGBE_DMATXCTL_TE;
5338                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5339         }
5340
5341         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5342                 txq = dev->data->tx_queues[i];
5343                 if (!txq->tx_deferred_start) {
5344                         ret = ixgbe_dev_tx_queue_start(dev, i);
5345                         if (ret < 0)
5346                                 return ret;
5347                 }
5348         }
5349
5350         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5351                 rxq = dev->data->rx_queues[i];
5352                 if (!rxq->rx_deferred_start) {
5353                         ret = ixgbe_dev_rx_queue_start(dev, i);
5354                         if (ret < 0)
5355                                 return ret;
5356                 }
5357         }
5358
5359         /* Enable Receive engine */
5360         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5361         if (hw->mac.type == ixgbe_mac_82598EB)
5362                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5363         rxctrl |= IXGBE_RXCTRL_RXEN;
5364         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5365
5366         /* If loopback mode is enabled, set up the link accordingly */
5367         if (dev->data->dev_conf.lpbk_mode != 0) {
5368                 if (hw->mac.type == ixgbe_mac_82599EB)
5369                         ixgbe_setup_loopback_link_82599(hw);
5370                 else if (hw->mac.type == ixgbe_mac_X540 ||
5371                      hw->mac.type == ixgbe_mac_X550 ||
5372                      hw->mac.type == ixgbe_mac_X550EM_x ||
5373                      hw->mac.type == ixgbe_mac_X550EM_a)
5374                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5375         }
5376
5377 #ifdef RTE_LIB_SECURITY
5378         if ((dev->data->dev_conf.rxmode.offloads &
5379                         DEV_RX_OFFLOAD_SECURITY) ||
5380                 (dev->data->dev_conf.txmode.offloads &
5381                         DEV_TX_OFFLOAD_SECURITY)) {
5382                 ret = ixgbe_crypto_enable_ipsec(dev);
5383                 if (ret != 0) {
5384                         PMD_DRV_LOG(ERR,
5385                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5386                                     ret);
5387                         return ret;
5388                 }
5389         }
5390 #endif
5391
5392         return 0;
5393 }
5394
5395 /*
5396  * Start Receive Units for specified queue.
5397  */
5398 int __rte_cold
5399 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5400 {
5401         struct ixgbe_hw     *hw;
5402         struct ixgbe_rx_queue *rxq;
5403         uint32_t rxdctl;
5404         int poll_ms;
5405
5406         PMD_INIT_FUNC_TRACE();
5407         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5408
5409         rxq = dev->data->rx_queues[rx_queue_id];
5410
5411         /* Allocate buffers for descriptor rings */
5412         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5413                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5414                              rx_queue_id);
5415                 return -1;
5416         }
5417         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5418         rxdctl |= IXGBE_RXDCTL_ENABLE;
5419         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5420
5421         /* Wait until RX Enable ready */
5422         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5423         do {
5424                 rte_delay_ms(1);
5425                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5426         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5427         if (!poll_ms)
5428                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5429         rte_wmb();
5430         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5431         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5432         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5433
5434         return 0;
5435 }
5436
5437 /*
5438  * Stop Receive Units for specified queue.
5439  */
5440 int __rte_cold
5441 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5442 {
5443         struct ixgbe_hw     *hw;
5444         struct ixgbe_adapter *adapter = dev->data->dev_private;
5445         struct ixgbe_rx_queue *rxq;
5446         uint32_t rxdctl;
5447         int poll_ms;
5448
5449         PMD_INIT_FUNC_TRACE();
5450         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5451
5452         rxq = dev->data->rx_queues[rx_queue_id];
5453
5454         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5455         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5456         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5457
5458         /* Wait until RX Enable bit clear */
5459         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5460         do {
5461                 rte_delay_ms(1);
5462                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5463         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5464         if (!poll_ms)
5465                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5466
5467         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5468
5469         ixgbe_rx_queue_release_mbufs(rxq);
5470         ixgbe_reset_rx_queue(adapter, rxq);
5471         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5472
5473         return 0;
5474 }
5475
5476
5477 /*
5478  * Start Transmit Units for specified queue.
5479  */
5480 int __rte_cold
5481 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5482 {
5483         struct ixgbe_hw     *hw;
5484         struct ixgbe_tx_queue *txq;
5485         uint32_t txdctl;
5486         int poll_ms;
5487
5488         PMD_INIT_FUNC_TRACE();
5489         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5490
5491         txq = dev->data->tx_queues[tx_queue_id];
5492         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5493         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5494         txdctl |= IXGBE_TXDCTL_ENABLE;
5495         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5496
5497         /* Wait until TX Enable ready */
5498         if (hw->mac.type == ixgbe_mac_82599EB) {
5499                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5500                 do {
5501                         rte_delay_ms(1);
5502                         txdctl = IXGBE_READ_REG(hw,
5503                                 IXGBE_TXDCTL(txq->reg_idx));
5504                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5505                 if (!poll_ms)
5506                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5507                                 tx_queue_id);
5508         }
5509         rte_wmb();
5510         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5511         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5512
5513         return 0;
5514 }
5515
5516 /*
5517  * Stop Transmit Units for specified queue.
5518  */
5519 int __rte_cold
5520 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5521 {
5522         struct ixgbe_hw     *hw;
5523         struct ixgbe_tx_queue *txq;
5524         uint32_t txdctl;
5525         uint32_t txtdh, txtdt;
5526         int poll_ms;
5527
5528         PMD_INIT_FUNC_TRACE();
5529         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5530
5531         txq = dev->data->tx_queues[tx_queue_id];
5532
5533         /* Wait until TX queue is empty */
5534         if (hw->mac.type == ixgbe_mac_82599EB) {
5535                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5536                 do {
5537                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5538                         txtdh = IXGBE_READ_REG(hw,
5539                                                IXGBE_TDH(txq->reg_idx));
5540                         txtdt = IXGBE_READ_REG(hw,
5541                                                IXGBE_TDT(txq->reg_idx));
5542                 } while (--poll_ms && (txtdh != txtdt));
5543                 if (!poll_ms)
5544                         PMD_INIT_LOG(ERR,
5545                                 "Tx Queue %d is not empty when stopping.",
5546                                 tx_queue_id);
5547         }
5548
5549         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5550         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5551         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5552
5553         /* Wait until TX Enable bit clear */
5554         if (hw->mac.type == ixgbe_mac_82599EB) {
5555                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5556                 do {
5557                         rte_delay_ms(1);
5558                         txdctl = IXGBE_READ_REG(hw,
5559                                                 IXGBE_TXDCTL(txq->reg_idx));
5560                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5561                 if (!poll_ms)
5562                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5563                                 tx_queue_id);
5564         }
5565
5566         if (txq->ops != NULL) {
5567                 txq->ops->release_mbufs(txq);
5568                 txq->ops->reset(txq);
5569         }
5570         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5571
5572         return 0;
5573 }
5574
5575 void
5576 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5577         struct rte_eth_rxq_info *qinfo)
5578 {
5579         struct ixgbe_rx_queue *rxq;
5580
5581         rxq = dev->data->rx_queues[queue_id];
5582
5583         qinfo->mp = rxq->mb_pool;
5584         qinfo->scattered_rx = dev->data->scattered_rx;
5585         qinfo->nb_desc = rxq->nb_rx_desc;
5586
5587         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5588         qinfo->conf.rx_drop_en = rxq->drop_en;
5589         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5590         qinfo->conf.offloads = rxq->offloads;
5591 }
5592
5593 void
5594 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5595         struct rte_eth_txq_info *qinfo)
5596 {
5597         struct ixgbe_tx_queue *txq;
5598
5599         txq = dev->data->tx_queues[queue_id];
5600
5601         qinfo->nb_desc = txq->nb_tx_desc;
5602
5603         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5604         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5605         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5606
5607         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5608         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5609         qinfo->conf.offloads = txq->offloads;
5610         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5611 }
5612
5613 /*
5614  * [VF] Initializes Receive Unit.
5615  */
5616 int __rte_cold
5617 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5618 {
5619         struct ixgbe_hw     *hw;
5620         struct ixgbe_rx_queue *rxq;
5621         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5622         uint64_t bus_addr;
5623         uint32_t srrctl, psrtype = 0;
5624         uint16_t buf_size;
5625         uint16_t i;
5626         int ret;
5627
5628         PMD_INIT_FUNC_TRACE();
5629         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5630
5631         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5632                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5633                         "it should be power of 2");
5634                 return -1;
5635         }
5636
5637         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5638                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5639                         "it should be equal to or less than %d",
5640                         hw->mac.max_rx_queues);
5641                 return -1;
5642         }
5643
5644         /*
5645          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5646          * disables the VF receipt of packets if the PF MTU is > 1500.
5647          * This is done to deal with 82599 limitations that imposes
5648          * the PF and all VFs to share the same MTU.
5649          * Then, the PF driver enables again the VF receipt of packet when
5650          * the VF driver issues a IXGBE_VF_SET_LPE request.
5651          * In the meantime, the VF device cannot be used, even if the VF driver
5652          * and the Guest VM network stack are ready to accept packets with a
5653          * size up to the PF MTU.
5654          * As a work-around to this PF behaviour, force the call to
5655          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5656          * VF packets received can work in all cases.
5657          */
5658         if (ixgbevf_rlpml_set_vf(hw,
5659             (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len)) {
5660                 PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5661                              dev->data->dev_conf.rxmode.max_rx_pkt_len);
5662                 return -EINVAL;
5663         }
5664
5665         /*
5666          * Assume no header split and no VLAN strip support
5667          * on any Rx queue first .
5668          */
5669         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5670         /* Setup RX queues */
5671         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5672                 rxq = dev->data->rx_queues[i];
5673
5674                 /* Allocate buffers for descriptor rings */
5675                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5676                 if (ret)
5677                         return ret;
5678
5679                 /* Setup the Base and Length of the Rx Descriptor Rings */
5680                 bus_addr = rxq->rx_ring_phys_addr;
5681
5682                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5683                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5684                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5685                                 (uint32_t)(bus_addr >> 32));
5686                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5687                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5688                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5689                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5690
5691
5692                 /* Configure the SRRCTL register */
5693                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5694
5695                 /* Set if packets are dropped when no descriptors available */
5696                 if (rxq->drop_en)
5697                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5698
5699                 /*
5700                  * Configure the RX buffer size in the BSIZEPACKET field of
5701                  * the SRRCTL register of the queue.
5702                  * The value is in 1 KB resolution. Valid values can be from
5703                  * 1 KB to 16 KB.
5704                  */
5705                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5706                         RTE_PKTMBUF_HEADROOM);
5707                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5708                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5709
5710                 /*
5711                  * VF modification to write virtual function SRRCTL register
5712                  */
5713                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5714
5715                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5716                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5717
5718                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5719                     /* It adds dual VLAN length for supporting dual VLAN */
5720                     (rxmode->max_rx_pkt_len +
5721                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5722                         if (!dev->data->scattered_rx)
5723                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5724                         dev->data->scattered_rx = 1;
5725                 }
5726
5727                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5728                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5729         }
5730
5731         /* Set RQPL for VF RSS according to max Rx queue */
5732         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5733                 IXGBE_PSRTYPE_RQPL_SHIFT;
5734         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5735
5736         ixgbe_set_rx_function(dev);
5737
5738         return 0;
5739 }
5740
5741 /*
5742  * [VF] Initializes Transmit Unit.
5743  */
5744 void __rte_cold
5745 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5746 {
5747         struct ixgbe_hw     *hw;
5748         struct ixgbe_tx_queue *txq;
5749         uint64_t bus_addr;
5750         uint32_t txctrl;
5751         uint16_t i;
5752
5753         PMD_INIT_FUNC_TRACE();
5754         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5755
5756         /* Setup the Base and Length of the Tx Descriptor Rings */
5757         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5758                 txq = dev->data->tx_queues[i];
5759                 bus_addr = txq->tx_ring_phys_addr;
5760                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5761                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5762                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5763                                 (uint32_t)(bus_addr >> 32));
5764                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5765                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5766                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5767                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5768                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5769
5770                 /*
5771                  * Disable Tx Head Writeback RO bit, since this hoses
5772                  * bookkeeping if things aren't delivered in order.
5773                  */
5774                 txctrl = IXGBE_READ_REG(hw,
5775                                 IXGBE_VFDCA_TXCTRL(i));
5776                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5777                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5778                                 txctrl);
5779         }
5780 }
5781
5782 /*
5783  * [VF] Start Transmit and Receive Units.
5784  */
5785 void __rte_cold
5786 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5787 {
5788         struct ixgbe_hw     *hw;
5789         struct ixgbe_tx_queue *txq;
5790         struct ixgbe_rx_queue *rxq;
5791         uint32_t txdctl;
5792         uint32_t rxdctl;
5793         uint16_t i;
5794         int poll_ms;
5795
5796         PMD_INIT_FUNC_TRACE();
5797         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5798
5799         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5800                 txq = dev->data->tx_queues[i];
5801                 /* Setup Transmit Threshold Registers */
5802                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5803                 txdctl |= txq->pthresh & 0x7F;
5804                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5805                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5806                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5807         }
5808
5809         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5810
5811                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5812                 txdctl |= IXGBE_TXDCTL_ENABLE;
5813                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5814
5815                 poll_ms = 10;
5816                 /* Wait until TX Enable ready */
5817                 do {
5818                         rte_delay_ms(1);
5819                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5820                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5821                 if (!poll_ms)
5822                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5823         }
5824         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5825
5826                 rxq = dev->data->rx_queues[i];
5827
5828                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5829                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5830                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5831
5832                 /* Wait until RX Enable ready */
5833                 poll_ms = 10;
5834                 do {
5835                         rte_delay_ms(1);
5836                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5837                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5838                 if (!poll_ms)
5839                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5840                 rte_wmb();
5841                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5842
5843         }
5844 }
5845
5846 int
5847 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5848                     const struct rte_flow_action_rss *in)
5849 {
5850         if (in->key_len > RTE_DIM(out->key) ||
5851             in->queue_num > RTE_DIM(out->queue))
5852                 return -EINVAL;
5853         out->conf = (struct rte_flow_action_rss){
5854                 .func = in->func,
5855                 .level = in->level,
5856                 .types = in->types,
5857                 .key_len = in->key_len,
5858                 .queue_num = in->queue_num,
5859                 .key = memcpy(out->key, in->key, in->key_len),
5860                 .queue = memcpy(out->queue, in->queue,
5861                                 sizeof(*in->queue) * in->queue_num),
5862         };
5863         return 0;
5864 }
5865
5866 int
5867 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5868                       const struct rte_flow_action_rss *with)
5869 {
5870         return (comp->func == with->func &&
5871                 comp->level == with->level &&
5872                 comp->types == with->types &&
5873                 comp->key_len == with->key_len &&
5874                 comp->queue_num == with->queue_num &&
5875                 !memcmp(comp->key, with->key, with->key_len) &&
5876                 !memcmp(comp->queue, with->queue,
5877                         sizeof(*with->queue) * with->queue_num));
5878 }
5879
5880 int
5881 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5882                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5883 {
5884         struct ixgbe_hw *hw;
5885         uint32_t reta;
5886         uint16_t i;
5887         uint16_t j;
5888         uint16_t sp_reta_size;
5889         uint32_t reta_reg;
5890         struct rte_eth_rss_conf rss_conf = {
5891                 .rss_key = conf->conf.key_len ?
5892                         (void *)(uintptr_t)conf->conf.key : NULL,
5893                 .rss_key_len = conf->conf.key_len,
5894                 .rss_hf = conf->conf.types,
5895         };
5896         struct ixgbe_filter_info *filter_info =
5897                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5898
5899         PMD_INIT_FUNC_TRACE();
5900         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5901
5902         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5903
5904         if (!add) {
5905                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5906                                           &conf->conf)) {
5907                         ixgbe_rss_disable(dev);
5908                         memset(&filter_info->rss_info, 0,
5909                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5910                         return 0;
5911                 }
5912                 return -EINVAL;
5913         }
5914
5915         if (filter_info->rss_info.conf.queue_num)
5916                 return -EINVAL;
5917         /* Fill in redirection table
5918          * The byte-swap is needed because NIC registers are in
5919          * little-endian order.
5920          */
5921         reta = 0;
5922         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5923                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5924
5925                 if (j == conf->conf.queue_num)
5926                         j = 0;
5927                 reta = (reta << 8) | conf->conf.queue[j];
5928                 if ((i & 3) == 3)
5929                         IXGBE_WRITE_REG(hw, reta_reg,
5930                                         rte_bswap32(reta));
5931         }
5932
5933         /* Configure the RSS key and the RSS protocols used to compute
5934          * the RSS hash of input packets.
5935          */
5936         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5937                 ixgbe_rss_disable(dev);
5938                 return 0;
5939         }
5940         if (rss_conf.rss_key == NULL)
5941                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5942         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5943
5944         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5945                 return -EINVAL;
5946
5947         return 0;
5948 }
5949
5950 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5951 #if defined(RTE_ARCH_PPC_64)
5952 int
5953 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5954 {
5955         return -1;
5956 }
5957
5958 uint16_t
5959 ixgbe_recv_pkts_vec(
5960         void __rte_unused *rx_queue,
5961         struct rte_mbuf __rte_unused **rx_pkts,
5962         uint16_t __rte_unused nb_pkts)
5963 {
5964         return 0;
5965 }
5966
5967 uint16_t
5968 ixgbe_recv_scattered_pkts_vec(
5969         void __rte_unused *rx_queue,
5970         struct rte_mbuf __rte_unused **rx_pkts,
5971         uint16_t __rte_unused nb_pkts)
5972 {
5973         return 0;
5974 }
5975
5976 int
5977 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5978 {
5979         return -1;
5980 }
5981
5982 uint16_t
5983 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5984                 struct rte_mbuf __rte_unused **tx_pkts,
5985                 uint16_t __rte_unused nb_pkts)
5986 {
5987         return 0;
5988 }
5989
5990 int
5991 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5992 {
5993         return -1;
5994 }
5995
5996 void
5997 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
5998 {
5999         return;
6000 }
6001 #endif