1b9554ce9134b14d7bb68a6ca62ba03c9615b93b
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45
46 #include "ixgbe_logs.h"
47 #include "base/ixgbe_api.h"
48 #include "base/ixgbe_vf.h"
49 #include "ixgbe_ethdev.h"
50 #include "base/ixgbe_dcb.h"
51 #include "base/ixgbe_common.h"
52 #include "ixgbe_rxtx.h"
53
54 #ifdef RTE_LIBRTE_IEEE1588
55 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
56 #else
57 #define IXGBE_TX_IEEE1588_TMST 0
58 #endif
59 /* Bit Mask to indicate what bits required for building TX context */
60 #define IXGBE_TX_OFFLOAD_MASK (                  \
61                 PKT_TX_OUTER_IPV6 |              \
62                 PKT_TX_OUTER_IPV4 |              \
63                 PKT_TX_IPV6 |                    \
64                 PKT_TX_IPV4 |                    \
65                 PKT_TX_VLAN_PKT |                \
66                 PKT_TX_IP_CKSUM |                \
67                 PKT_TX_L4_MASK |                 \
68                 PKT_TX_TCP_SEG |                 \
69                 PKT_TX_MACSEC |                  \
70                 PKT_TX_OUTER_IP_CKSUM |          \
71                 PKT_TX_SEC_OFFLOAD |     \
72                 IXGBE_TX_IEEE1588_TMST)
73
74 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
75                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
76
77 #if 1
78 #define RTE_PMD_USE_PREFETCH
79 #endif
80
81 #ifdef RTE_PMD_USE_PREFETCH
82 /*
83  * Prefetch a cache line into all cache levels.
84  */
85 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
86 #else
87 #define rte_ixgbe_prefetch(p)   do {} while (0)
88 #endif
89
90 /*********************************************************************
91  *
92  *  TX functions
93  *
94  **********************************************************************/
95
96 /*
97  * Check for descriptors with their DD bit set and free mbufs.
98  * Return the total number of buffers freed.
99  */
100 static __rte_always_inline int
101 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
102 {
103         struct ixgbe_tx_entry *txep;
104         uint32_t status;
105         int i, nb_free = 0;
106         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
107
108         /* check DD bit on threshold descriptor */
109         status = txq->tx_ring[txq->tx_next_dd].wb.status;
110         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
111                 return 0;
112
113         /*
114          * first buffer to free from S/W ring is at index
115          * tx_next_dd - (tx_rs_thresh-1)
116          */
117         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
118
119         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
120                 /* free buffers one at a time */
121                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
122                 txep->mbuf = NULL;
123
124                 if (unlikely(m == NULL))
125                         continue;
126
127                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
128                     (nb_free > 0 && m->pool != free[0]->pool)) {
129                         rte_mempool_put_bulk(free[0]->pool,
130                                              (void **)free, nb_free);
131                         nb_free = 0;
132                 }
133
134                 free[nb_free++] = m;
135         }
136
137         if (nb_free > 0)
138                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
139
140         /* buffers were freed, update counters */
141         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
142         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
143         if (txq->tx_next_dd >= txq->nb_tx_desc)
144                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
145
146         return txq->tx_rs_thresh;
147 }
148
149 /* Populate 4 descriptors with data from 4 mbufs */
150 static inline void
151 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
152 {
153         uint64_t buf_dma_addr;
154         uint32_t pkt_len;
155         int i;
156
157         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
158                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
159                 pkt_len = (*pkts)->data_len;
160
161                 /* write data to descriptor */
162                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
163
164                 txdp->read.cmd_type_len =
165                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
166
167                 txdp->read.olinfo_status =
168                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
169
170                 rte_prefetch0(&(*pkts)->pool);
171         }
172 }
173
174 /* Populate 1 descriptor with data from 1 mbuf */
175 static inline void
176 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
177 {
178         uint64_t buf_dma_addr;
179         uint32_t pkt_len;
180
181         buf_dma_addr = rte_mbuf_data_iova(*pkts);
182         pkt_len = (*pkts)->data_len;
183
184         /* write data to descriptor */
185         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
186         txdp->read.cmd_type_len =
187                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
188         txdp->read.olinfo_status =
189                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
190         rte_prefetch0(&(*pkts)->pool);
191 }
192
193 /*
194  * Fill H/W descriptor ring with mbuf data.
195  * Copy mbuf pointers to the S/W ring.
196  */
197 static inline void
198 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
199                       uint16_t nb_pkts)
200 {
201         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
202         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
203         const int N_PER_LOOP = 4;
204         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
205         int mainpart, leftover;
206         int i, j;
207
208         /*
209          * Process most of the packets in chunks of N pkts.  Any
210          * leftover packets will get processed one at a time.
211          */
212         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
213         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
214         for (i = 0; i < mainpart; i += N_PER_LOOP) {
215                 /* Copy N mbuf pointers to the S/W ring */
216                 for (j = 0; j < N_PER_LOOP; ++j) {
217                         (txep + i + j)->mbuf = *(pkts + i + j);
218                 }
219                 tx4(txdp + i, pkts + i);
220         }
221
222         if (unlikely(leftover > 0)) {
223                 for (i = 0; i < leftover; ++i) {
224                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
225                         tx1(txdp + mainpart + i, pkts + mainpart + i);
226                 }
227         }
228 }
229
230 static inline uint16_t
231 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
232              uint16_t nb_pkts)
233 {
234         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
235         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
236         uint16_t n = 0;
237
238         /*
239          * Begin scanning the H/W ring for done descriptors when the
240          * number of available descriptors drops below tx_free_thresh.  For
241          * each done descriptor, free the associated buffer.
242          */
243         if (txq->nb_tx_free < txq->tx_free_thresh)
244                 ixgbe_tx_free_bufs(txq);
245
246         /* Only use descriptors that are available */
247         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
248         if (unlikely(nb_pkts == 0))
249                 return 0;
250
251         /* Use exactly nb_pkts descriptors */
252         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
253
254         /*
255          * At this point, we know there are enough descriptors in the
256          * ring to transmit all the packets.  This assumes that each
257          * mbuf contains a single segment, and that no new offloads
258          * are expected, which would require a new context descriptor.
259          */
260
261         /*
262          * See if we're going to wrap-around. If so, handle the top
263          * of the descriptor ring first, then do the bottom.  If not,
264          * the processing looks just like the "bottom" part anyway...
265          */
266         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
267                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
268                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
269
270                 /*
271                  * We know that the last descriptor in the ring will need to
272                  * have its RS bit set because tx_rs_thresh has to be
273                  * a divisor of the ring size
274                  */
275                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
276                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
277                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
278
279                 txq->tx_tail = 0;
280         }
281
282         /* Fill H/W descriptor ring with mbuf data */
283         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
284         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
285
286         /*
287          * Determine if RS bit should be set
288          * This is what we actually want:
289          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
290          * but instead of subtracting 1 and doing >=, we can just do
291          * greater than without subtracting.
292          */
293         if (txq->tx_tail > txq->tx_next_rs) {
294                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
295                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
296                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
297                                                 txq->tx_rs_thresh);
298                 if (txq->tx_next_rs >= txq->nb_tx_desc)
299                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
300         }
301
302         /*
303          * Check for wrap-around. This would only happen if we used
304          * up to the last descriptor in the ring, no more, no less.
305          */
306         if (txq->tx_tail >= txq->nb_tx_desc)
307                 txq->tx_tail = 0;
308
309         /* update tail pointer */
310         rte_wmb();
311         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
312
313         return nb_pkts;
314 }
315
316 uint16_t
317 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
318                        uint16_t nb_pkts)
319 {
320         uint16_t nb_tx;
321
322         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
323         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
324                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
325
326         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
327         nb_tx = 0;
328         while (nb_pkts) {
329                 uint16_t ret, n;
330
331                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
332                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
333                 nb_tx = (uint16_t)(nb_tx + ret);
334                 nb_pkts = (uint16_t)(nb_pkts - ret);
335                 if (ret < n)
336                         break;
337         }
338
339         return nb_tx;
340 }
341
342 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
343 static uint16_t
344 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
345                     uint16_t nb_pkts)
346 {
347         uint16_t nb_tx = 0;
348         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
349
350         while (nb_pkts) {
351                 uint16_t ret, num;
352
353                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
354                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
355                                                  num);
356                 nb_tx += ret;
357                 nb_pkts -= ret;
358                 if (ret < num)
359                         break;
360         }
361
362         return nb_tx;
363 }
364 #endif
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
370                 __rte_unused uint64_t *mdata)
371 {
372         uint32_t type_tucmd_mlhl;
373         uint32_t mss_l4len_idx = 0;
374         uint32_t ctx_idx;
375         uint32_t vlan_macip_lens;
376         union ixgbe_tx_offload tx_offload_mask;
377         uint32_t seqnum_seed = 0;
378
379         ctx_idx = txq->ctx_curr;
380         tx_offload_mask.data[0] = 0;
381         tx_offload_mask.data[1] = 0;
382         type_tucmd_mlhl = 0;
383
384         /* Specify which HW CTX to upload. */
385         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
386
387         if (ol_flags & PKT_TX_VLAN_PKT) {
388                 tx_offload_mask.vlan_tci |= ~0;
389         }
390
391         /* check if TCP segmentation required for this packet */
392         if (ol_flags & PKT_TX_TCP_SEG) {
393                 /* implies IP cksum in IPv4 */
394                 if (ol_flags & PKT_TX_IP_CKSUM)
395                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
396                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
397                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
398                 else
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
400                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
401                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
402
403                 tx_offload_mask.l2_len |= ~0;
404                 tx_offload_mask.l3_len |= ~0;
405                 tx_offload_mask.l4_len |= ~0;
406                 tx_offload_mask.tso_segsz |= ~0;
407                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
408                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
409         } else { /* no TSO, check if hardware checksum is needed */
410                 if (ol_flags & PKT_TX_IP_CKSUM) {
411                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
412                         tx_offload_mask.l2_len |= ~0;
413                         tx_offload_mask.l3_len |= ~0;
414                 }
415
416                 switch (ol_flags & PKT_TX_L4_MASK) {
417                 case PKT_TX_UDP_CKSUM:
418                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
419                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
421                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
422                         tx_offload_mask.l2_len |= ~0;
423                         tx_offload_mask.l3_len |= ~0;
424                         break;
425                 case PKT_TX_TCP_CKSUM:
426                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
429                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
430                         tx_offload_mask.l2_len |= ~0;
431                         tx_offload_mask.l3_len |= ~0;
432                         break;
433                 case PKT_TX_SCTP_CKSUM:
434                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
435                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
436                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
437                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
438                         tx_offload_mask.l2_len |= ~0;
439                         tx_offload_mask.l3_len |= ~0;
440                         break;
441                 default:
442                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
443                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
444                         break;
445                 }
446         }
447
448         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
449                 tx_offload_mask.outer_l2_len |= ~0;
450                 tx_offload_mask.outer_l3_len |= ~0;
451                 tx_offload_mask.l2_len |= ~0;
452                 seqnum_seed |= tx_offload.outer_l3_len
453                                << IXGBE_ADVTXD_OUTER_IPLEN;
454                 seqnum_seed |= tx_offload.l2_len
455                                << IXGBE_ADVTXD_TUNNEL_LEN;
456         }
457 #ifdef RTE_LIBRTE_SECURITY
458         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
459                 union ixgbe_crypto_tx_desc_md *md =
460                                 (union ixgbe_crypto_tx_desc_md *)mdata;
461                 seqnum_seed |=
462                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
463                 type_tucmd_mlhl |= md->enc ?
464                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
465                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
466                 type_tucmd_mlhl |=
467                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
468                 tx_offload_mask.sa_idx |= ~0;
469                 tx_offload_mask.sec_pad_len |= ~0;
470         }
471 #endif
472
473         txq->ctx_cache[ctx_idx].flags = ol_flags;
474         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
475                 tx_offload_mask.data[0] & tx_offload.data[0];
476         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
477                 tx_offload_mask.data[1] & tx_offload.data[1];
478         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
479
480         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
481         vlan_macip_lens = tx_offload.l3_len;
482         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
483                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
484                                     IXGBE_ADVTXD_MACLEN_SHIFT);
485         else
486                 vlan_macip_lens |= (tx_offload.l2_len <<
487                                     IXGBE_ADVTXD_MACLEN_SHIFT);
488         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
489         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
490         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
491         ctx_txd->seqnum_seed     = seqnum_seed;
492 }
493
494 /*
495  * Check which hardware context can be used. Use the existing match
496  * or create a new context descriptor.
497  */
498 static inline uint32_t
499 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
500                    union ixgbe_tx_offload tx_offload)
501 {
502         /* If match with the current used context */
503         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
504                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
505                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
506                      & tx_offload.data[0])) &&
507                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
508                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
509                      & tx_offload.data[1]))))
510                 return txq->ctx_curr;
511
512         /* What if match with the next context  */
513         txq->ctx_curr ^= 1;
514         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
515                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
516                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
517                      & tx_offload.data[0])) &&
518                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
519                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
520                      & tx_offload.data[1]))))
521                 return txq->ctx_curr;
522
523         /* Mismatch, use the previous context */
524         return IXGBE_CTX_NUM;
525 }
526
527 static inline uint32_t
528 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
529 {
530         uint32_t tmp = 0;
531
532         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
534         if (ol_flags & PKT_TX_IP_CKSUM)
535                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
536         if (ol_flags & PKT_TX_TCP_SEG)
537                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
538         return tmp;
539 }
540
541 static inline uint32_t
542 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
543 {
544         uint32_t cmdtype = 0;
545
546         if (ol_flags & PKT_TX_VLAN_PKT)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
548         if (ol_flags & PKT_TX_TCP_SEG)
549                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
550         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
551                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
552         if (ol_flags & PKT_TX_MACSEC)
553                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
554         return cmdtype;
555 }
556
557 /* Default RS bit threshold values */
558 #ifndef DEFAULT_TX_RS_THRESH
559 #define DEFAULT_TX_RS_THRESH   32
560 #endif
561 #ifndef DEFAULT_TX_FREE_THRESH
562 #define DEFAULT_TX_FREE_THRESH 32
563 #endif
564
565 /* Reset transmit descriptors after they have been used */
566 static inline int
567 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
568 {
569         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
570         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
571         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
572         uint16_t nb_tx_desc = txq->nb_tx_desc;
573         uint16_t desc_to_clean_to;
574         uint16_t nb_tx_to_clean;
575         uint32_t status;
576
577         /* Determine the last descriptor needing to be cleaned */
578         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
579         if (desc_to_clean_to >= nb_tx_desc)
580                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
581
582         /* Check to make sure the last descriptor to clean is done */
583         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
584         status = txr[desc_to_clean_to].wb.status;
585         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
586                 PMD_TX_FREE_LOG(DEBUG,
587                                 "TX descriptor %4u is not done"
588                                 "(port=%d queue=%d)",
589                                 desc_to_clean_to,
590                                 txq->port_id, txq->queue_id);
591                 /* Failed to clean any descriptors, better luck next time */
592                 return -(1);
593         }
594
595         /* Figure out how many descriptors will be cleaned */
596         if (last_desc_cleaned > desc_to_clean_to)
597                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
598                                                         desc_to_clean_to);
599         else
600                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
601                                                 last_desc_cleaned);
602
603         PMD_TX_FREE_LOG(DEBUG,
604                         "Cleaning %4u TX descriptors: %4u to %4u "
605                         "(port=%d queue=%d)",
606                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
607                         txq->port_id, txq->queue_id);
608
609         /*
610          * The last descriptor to clean is done, so that means all the
611          * descriptors from the last descriptor that was cleaned
612          * up to the last descriptor with the RS bit set
613          * are done. Only reset the threshold descriptor.
614          */
615         txr[desc_to_clean_to].wb.status = 0;
616
617         /* Update the txq to reflect the last descriptor that was cleaned */
618         txq->last_desc_cleaned = desc_to_clean_to;
619         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
620
621         /* No Error */
622         return 0;
623 }
624
625 uint16_t
626 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
627                 uint16_t nb_pkts)
628 {
629         struct ixgbe_tx_queue *txq;
630         struct ixgbe_tx_entry *sw_ring;
631         struct ixgbe_tx_entry *txe, *txn;
632         volatile union ixgbe_adv_tx_desc *txr;
633         volatile union ixgbe_adv_tx_desc *txd, *txp;
634         struct rte_mbuf     *tx_pkt;
635         struct rte_mbuf     *m_seg;
636         uint64_t buf_dma_addr;
637         uint32_t olinfo_status;
638         uint32_t cmd_type_len;
639         uint32_t pkt_len;
640         uint16_t slen;
641         uint64_t ol_flags;
642         uint16_t tx_id;
643         uint16_t tx_last;
644         uint16_t nb_tx;
645         uint16_t nb_used;
646         uint64_t tx_ol_req;
647         uint32_t ctx = 0;
648         uint32_t new_ctx;
649         union ixgbe_tx_offload tx_offload;
650 #ifdef RTE_LIBRTE_SECURITY
651         uint8_t use_ipsec;
652 #endif
653
654         tx_offload.data[0] = 0;
655         tx_offload.data[1] = 0;
656         txq = tx_queue;
657         sw_ring = txq->sw_ring;
658         txr     = txq->tx_ring;
659         tx_id   = txq->tx_tail;
660         txe = &sw_ring[tx_id];
661         txp = NULL;
662
663         /* Determine if the descriptor ring needs to be cleaned. */
664         if (txq->nb_tx_free < txq->tx_free_thresh)
665                 ixgbe_xmit_cleanup(txq);
666
667         rte_prefetch0(&txe->mbuf->pool);
668
669         /* TX loop */
670         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
671                 new_ctx = 0;
672                 tx_pkt = *tx_pkts++;
673                 pkt_len = tx_pkt->pkt_len;
674
675                 /*
676                  * Determine how many (if any) context descriptors
677                  * are needed for offload functionality.
678                  */
679                 ol_flags = tx_pkt->ol_flags;
680 #ifdef RTE_LIBRTE_SECURITY
681                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
682 #endif
683
684                 /* If hardware offload required */
685                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
686                 if (tx_ol_req) {
687                         tx_offload.l2_len = tx_pkt->l2_len;
688                         tx_offload.l3_len = tx_pkt->l3_len;
689                         tx_offload.l4_len = tx_pkt->l4_len;
690                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
691                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
692                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
693                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
694 #ifdef RTE_LIBRTE_SECURITY
695                         if (use_ipsec) {
696                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
697                                         (union ixgbe_crypto_tx_desc_md *)
698                                                         &tx_pkt->udata64;
699                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
700                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
701                         }
702 #endif
703
704                         /* If new context need be built or reuse the exist ctx. */
705                         ctx = what_advctx_update(txq, tx_ol_req,
706                                 tx_offload);
707                         /* Only allocate context descriptor if required*/
708                         new_ctx = (ctx == IXGBE_CTX_NUM);
709                         ctx = txq->ctx_curr;
710                 }
711
712                 /*
713                  * Keep track of how many descriptors are used this loop
714                  * This will always be the number of segments + the number of
715                  * Context descriptors required to transmit the packet
716                  */
717                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
718
719                 if (txp != NULL &&
720                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
721                         /* set RS on the previous packet in the burst */
722                         txp->read.cmd_type_len |=
723                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
724
725                 /*
726                  * The number of descriptors that must be allocated for a
727                  * packet is the number of segments of that packet, plus 1
728                  * Context Descriptor for the hardware offload, if any.
729                  * Determine the last TX descriptor to allocate in the TX ring
730                  * for the packet, starting from the current position (tx_id)
731                  * in the ring.
732                  */
733                 tx_last = (uint16_t) (tx_id + nb_used - 1);
734
735                 /* Circular ring */
736                 if (tx_last >= txq->nb_tx_desc)
737                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
738
739                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
740                            " tx_first=%u tx_last=%u",
741                            (unsigned) txq->port_id,
742                            (unsigned) txq->queue_id,
743                            (unsigned) pkt_len,
744                            (unsigned) tx_id,
745                            (unsigned) tx_last);
746
747                 /*
748                  * Make sure there are enough TX descriptors available to
749                  * transmit the entire packet.
750                  * nb_used better be less than or equal to txq->tx_rs_thresh
751                  */
752                 if (nb_used > txq->nb_tx_free) {
753                         PMD_TX_FREE_LOG(DEBUG,
754                                         "Not enough free TX descriptors "
755                                         "nb_used=%4u nb_free=%4u "
756                                         "(port=%d queue=%d)",
757                                         nb_used, txq->nb_tx_free,
758                                         txq->port_id, txq->queue_id);
759
760                         if (ixgbe_xmit_cleanup(txq) != 0) {
761                                 /* Could not clean any descriptors */
762                                 if (nb_tx == 0)
763                                         return 0;
764                                 goto end_of_tx;
765                         }
766
767                         /* nb_used better be <= txq->tx_rs_thresh */
768                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
769                                 PMD_TX_FREE_LOG(DEBUG,
770                                         "The number of descriptors needed to "
771                                         "transmit the packet exceeds the "
772                                         "RS bit threshold. This will impact "
773                                         "performance."
774                                         "nb_used=%4u nb_free=%4u "
775                                         "tx_rs_thresh=%4u. "
776                                         "(port=%d queue=%d)",
777                                         nb_used, txq->nb_tx_free,
778                                         txq->tx_rs_thresh,
779                                         txq->port_id, txq->queue_id);
780                                 /*
781                                  * Loop here until there are enough TX
782                                  * descriptors or until the ring cannot be
783                                  * cleaned.
784                                  */
785                                 while (nb_used > txq->nb_tx_free) {
786                                         if (ixgbe_xmit_cleanup(txq) != 0) {
787                                                 /*
788                                                  * Could not clean any
789                                                  * descriptors
790                                                  */
791                                                 if (nb_tx == 0)
792                                                         return 0;
793                                                 goto end_of_tx;
794                                         }
795                                 }
796                         }
797                 }
798
799                 /*
800                  * By now there are enough free TX descriptors to transmit
801                  * the packet.
802                  */
803
804                 /*
805                  * Set common flags of all TX Data Descriptors.
806                  *
807                  * The following bits must be set in all Data Descriptors:
808                  *   - IXGBE_ADVTXD_DTYP_DATA
809                  *   - IXGBE_ADVTXD_DCMD_DEXT
810                  *
811                  * The following bits must be set in the first Data Descriptor
812                  * and are ignored in the other ones:
813                  *   - IXGBE_ADVTXD_DCMD_IFCS
814                  *   - IXGBE_ADVTXD_MAC_1588
815                  *   - IXGBE_ADVTXD_DCMD_VLE
816                  *
817                  * The following bits must only be set in the last Data
818                  * Descriptor:
819                  *   - IXGBE_TXD_CMD_EOP
820                  *
821                  * The following bits can be set in any Data Descriptor, but
822                  * are only set in the last Data Descriptor:
823                  *   - IXGBE_TXD_CMD_RS
824                  */
825                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
826                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
827
828 #ifdef RTE_LIBRTE_IEEE1588
829                 if (ol_flags & PKT_TX_IEEE1588_TMST)
830                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
831 #endif
832
833                 olinfo_status = 0;
834                 if (tx_ol_req) {
835
836                         if (ol_flags & PKT_TX_TCP_SEG) {
837                                 /* when TSO is on, paylen in descriptor is the
838                                  * not the packet len but the tcp payload len */
839                                 pkt_len -= (tx_offload.l2_len +
840                                         tx_offload.l3_len + tx_offload.l4_len);
841                         }
842
843                         /*
844                          * Setup the TX Advanced Context Descriptor if required
845                          */
846                         if (new_ctx) {
847                                 volatile struct ixgbe_adv_tx_context_desc *
848                                     ctx_txd;
849
850                                 ctx_txd = (volatile struct
851                                     ixgbe_adv_tx_context_desc *)
852                                     &txr[tx_id];
853
854                                 txn = &sw_ring[txe->next_id];
855                                 rte_prefetch0(&txn->mbuf->pool);
856
857                                 if (txe->mbuf != NULL) {
858                                         rte_pktmbuf_free_seg(txe->mbuf);
859                                         txe->mbuf = NULL;
860                                 }
861
862                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
863                                         tx_offload, &tx_pkt->udata64);
864
865                                 txe->last_id = tx_last;
866                                 tx_id = txe->next_id;
867                                 txe = txn;
868                         }
869
870                         /*
871                          * Setup the TX Advanced Data Descriptor,
872                          * This path will go through
873                          * whatever new/reuse the context descriptor
874                          */
875                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
876                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
877                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
878                 }
879
880                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
881 #ifdef RTE_LIBRTE_SECURITY
882                 if (use_ipsec)
883                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
884 #endif
885
886                 m_seg = tx_pkt;
887                 do {
888                         txd = &txr[tx_id];
889                         txn = &sw_ring[txe->next_id];
890                         rte_prefetch0(&txn->mbuf->pool);
891
892                         if (txe->mbuf != NULL)
893                                 rte_pktmbuf_free_seg(txe->mbuf);
894                         txe->mbuf = m_seg;
895
896                         /*
897                          * Set up Transmit Data Descriptor.
898                          */
899                         slen = m_seg->data_len;
900                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
901                         txd->read.buffer_addr =
902                                 rte_cpu_to_le_64(buf_dma_addr);
903                         txd->read.cmd_type_len =
904                                 rte_cpu_to_le_32(cmd_type_len | slen);
905                         txd->read.olinfo_status =
906                                 rte_cpu_to_le_32(olinfo_status);
907                         txe->last_id = tx_last;
908                         tx_id = txe->next_id;
909                         txe = txn;
910                         m_seg = m_seg->next;
911                 } while (m_seg != NULL);
912
913                 /*
914                  * The last packet data descriptor needs End Of Packet (EOP)
915                  */
916                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
917                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
918                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
919
920                 /* Set RS bit only on threshold packets' last descriptor */
921                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
922                         PMD_TX_FREE_LOG(DEBUG,
923                                         "Setting RS bit on TXD id="
924                                         "%4u (port=%d queue=%d)",
925                                         tx_last, txq->port_id, txq->queue_id);
926
927                         cmd_type_len |= IXGBE_TXD_CMD_RS;
928
929                         /* Update txq RS bit counters */
930                         txq->nb_tx_used = 0;
931                         txp = NULL;
932                 } else
933                         txp = txd;
934
935                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
936         }
937
938 end_of_tx:
939         /* set RS on last packet in the burst */
940         if (txp != NULL)
941                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
942
943         rte_wmb();
944
945         /*
946          * Set the Transmit Descriptor Tail (TDT)
947          */
948         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
949                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
950                    (unsigned) tx_id, (unsigned) nb_tx);
951         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
952         txq->tx_tail = tx_id;
953
954         return nb_tx;
955 }
956
957 /*********************************************************************
958  *
959  *  TX prep functions
960  *
961  **********************************************************************/
962 uint16_t
963 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
964 {
965         int i, ret;
966         uint64_t ol_flags;
967         struct rte_mbuf *m;
968         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
969
970         for (i = 0; i < nb_pkts; i++) {
971                 m = tx_pkts[i];
972                 ol_flags = m->ol_flags;
973
974                 /**
975                  * Check if packet meets requirements for number of segments
976                  *
977                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
978                  *       non-TSO
979                  */
980
981                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
982                         rte_errno = EINVAL;
983                         return i;
984                 }
985
986                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
987                         rte_errno = ENOTSUP;
988                         return i;
989                 }
990
991 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
992                 ret = rte_validate_tx_offload(m);
993                 if (ret != 0) {
994                         rte_errno = -ret;
995                         return i;
996                 }
997 #endif
998                 ret = rte_net_intel_cksum_prepare(m);
999                 if (ret != 0) {
1000                         rte_errno = -ret;
1001                         return i;
1002                 }
1003         }
1004
1005         return i;
1006 }
1007
1008 /*********************************************************************
1009  *
1010  *  RX functions
1011  *
1012  **********************************************************************/
1013
1014 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1015 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1016 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1017 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1018 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1019 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1020 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1021 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1022 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1023 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1024 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1025 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1026 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1027 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1028 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1029 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1030 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1031 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1032 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1033 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1034 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1035 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1037 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1038 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1041 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1042 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1045 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1046 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1047
1048 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1049 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1050 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1051 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1052 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1071
1072 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1073 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1074 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1075 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1076 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1095
1096 /**
1097  * Use 2 different table for normal packet and tunnel packet
1098  * to save the space.
1099  */
1100 const uint32_t
1101         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1102         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1103         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1104                 RTE_PTYPE_L3_IPV4,
1105         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1106                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1107         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1108                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1109         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1110                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1111         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1112                 RTE_PTYPE_L3_IPV4_EXT,
1113         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1114                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1115         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1116                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1117         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1118                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1119         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1120                 RTE_PTYPE_L3_IPV6,
1121         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1122                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1123         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1124                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1125         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1126                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1127         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1128                 RTE_PTYPE_L3_IPV6_EXT,
1129         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1131         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1132                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1133         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1134                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1135         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1136                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1137                 RTE_PTYPE_INNER_L3_IPV6,
1138         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1140                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1141         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1142                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1143         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1144         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1145                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1146                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1147         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1148                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1149                 RTE_PTYPE_INNER_L3_IPV6,
1150         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1151                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1152                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1153         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1154                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1155                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1156         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1157                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1158                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1159         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1160                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1161                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1162         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1163                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1164                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1165         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1166                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1167                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1168         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1169                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1170                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1171         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1172                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1173                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1174         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1175                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1176                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1177         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1178                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1179                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1180         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1181                 RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1184 };
1185
1186 const uint32_t
1187         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1188         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1189                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1190                 RTE_PTYPE_INNER_L2_ETHER,
1191         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1192                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1193                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1194         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1195                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1196                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1197         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1198                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1199                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1200         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1201                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1202                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1203         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1204                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1205                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1206         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1207                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1209         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1210                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1211                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1212                 RTE_PTYPE_INNER_L4_TCP,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1216                 RTE_PTYPE_INNER_L4_TCP,
1217         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1218                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1220         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1221                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1223                 RTE_PTYPE_INNER_L4_TCP,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1225                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1226                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1227                 RTE_PTYPE_INNER_L3_IPV4,
1228         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1229                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1230                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1231                 RTE_PTYPE_INNER_L4_UDP,
1232         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1233                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1234                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1235                 RTE_PTYPE_INNER_L4_UDP,
1236         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1237                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1238                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1239                 RTE_PTYPE_INNER_L4_SCTP,
1240         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1241                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1242                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1243         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1244                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1246                 RTE_PTYPE_INNER_L4_UDP,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1250                 RTE_PTYPE_INNER_L4_SCTP,
1251         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1252                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1253                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1254                 RTE_PTYPE_INNER_L3_IPV4,
1255         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1256                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1257                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1258                 RTE_PTYPE_INNER_L4_SCTP,
1259         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1260                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1261                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1262                 RTE_PTYPE_INNER_L4_SCTP,
1263         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1264                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1265                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1266                 RTE_PTYPE_INNER_L4_TCP,
1267         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1268                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1269                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1270                 RTE_PTYPE_INNER_L4_UDP,
1271
1272         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1273                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1274                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1275         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1276                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1277                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1278                 RTE_PTYPE_INNER_L3_IPV4,
1279         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1282                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1283         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1284                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1285                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1286                 RTE_PTYPE_INNER_L3_IPV6,
1287         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1288                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1289                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1290                 RTE_PTYPE_INNER_L3_IPV4,
1291         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1292                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1293                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1294                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1295         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1296                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1297                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1298                 RTE_PTYPE_INNER_L3_IPV4,
1299         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1300                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1301                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1302                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1303         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1304                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1305                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1306                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1307         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1308                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1309                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1310                 RTE_PTYPE_INNER_L3_IPV4,
1311         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1312                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1313                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1314                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1315         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1316                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1317                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1318                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1319         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1320                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1321                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1322                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1323         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1324                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1325                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1326                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1327         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1328                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1329                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1330                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1331         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1332                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1333                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1334                 RTE_PTYPE_INNER_L3_IPV4,
1335         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1336                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1337                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1338                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1339         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1340                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1341                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1342                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1343         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1344                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1345                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1346                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1347         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1348                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1349                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1350                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1351         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1352                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1353                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1354                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1355         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1356                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1357                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1358                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1359         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1360                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1361                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1362                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1363 };
1364
1365 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1366 static inline uint32_t
1367 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1368 {
1369
1370         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1371                 return RTE_PTYPE_UNKNOWN;
1372
1373         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1374
1375         /* For tunnel packet */
1376         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1377                 /* Remove the tunnel bit to save the space. */
1378                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1379                 return ptype_table_tn[pkt_info];
1380         }
1381
1382         /**
1383          * For x550, if it's not tunnel,
1384          * tunnel type bit should be set to 0.
1385          * Reuse 82599's mask.
1386          */
1387         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1388
1389         return ptype_table[pkt_info];
1390 }
1391
1392 static inline uint64_t
1393 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1394 {
1395         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1396                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1397                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1398                 PKT_RX_RSS_HASH, 0, 0, 0,
1399                 0, 0, 0,  PKT_RX_FDIR,
1400         };
1401 #ifdef RTE_LIBRTE_IEEE1588
1402         static uint64_t ip_pkt_etqf_map[8] = {
1403                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1404                 0, 0, 0, 0,
1405         };
1406
1407         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1408                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1409                                 ip_rss_types_map[pkt_info & 0XF];
1410         else
1411                 return ip_rss_types_map[pkt_info & 0XF];
1412 #else
1413         return ip_rss_types_map[pkt_info & 0XF];
1414 #endif
1415 }
1416
1417 static inline uint64_t
1418 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1419 {
1420         uint64_t pkt_flags;
1421
1422         /*
1423          * Check if VLAN present only.
1424          * Do not check whether L3/L4 rx checksum done by NIC or not,
1425          * That can be found from rte_eth_rxmode.offloads flag
1426          */
1427         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1428
1429 #ifdef RTE_LIBRTE_IEEE1588
1430         if (rx_status & IXGBE_RXD_STAT_TMST)
1431                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1432 #endif
1433         return pkt_flags;
1434 }
1435
1436 static inline uint64_t
1437 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1438 {
1439         uint64_t pkt_flags;
1440
1441         /*
1442          * Bit 31: IPE, IPv4 checksum error
1443          * Bit 30: L4I, L4I integrity error
1444          */
1445         static uint64_t error_to_pkt_flags_map[4] = {
1446                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1447                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1448                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1449                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1450         };
1451         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1452                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1453
1454         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1455             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1456                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1457         }
1458
1459 #ifdef RTE_LIBRTE_SECURITY
1460         if (rx_status & IXGBE_RXD_STAT_SECP) {
1461                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1462                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1463                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1464         }
1465 #endif
1466
1467         return pkt_flags;
1468 }
1469
1470 /*
1471  * LOOK_AHEAD defines how many desc statuses to check beyond the
1472  * current descriptor.
1473  * It must be a pound define for optimal performance.
1474  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1475  * function only works with LOOK_AHEAD=8.
1476  */
1477 #define LOOK_AHEAD 8
1478 #if (LOOK_AHEAD != 8)
1479 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1480 #endif
1481 static inline int
1482 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1483 {
1484         volatile union ixgbe_adv_rx_desc *rxdp;
1485         struct ixgbe_rx_entry *rxep;
1486         struct rte_mbuf *mb;
1487         uint16_t pkt_len;
1488         uint64_t pkt_flags;
1489         int nb_dd;
1490         uint32_t s[LOOK_AHEAD];
1491         uint32_t pkt_info[LOOK_AHEAD];
1492         int i, j, nb_rx = 0;
1493         uint32_t status;
1494         uint64_t vlan_flags = rxq->vlan_flags;
1495
1496         /* get references to current descriptor and S/W ring entry */
1497         rxdp = &rxq->rx_ring[rxq->rx_tail];
1498         rxep = &rxq->sw_ring[rxq->rx_tail];
1499
1500         status = rxdp->wb.upper.status_error;
1501         /* check to make sure there is at least 1 packet to receive */
1502         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1503                 return 0;
1504
1505         /*
1506          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1507          * reference packets that are ready to be received.
1508          */
1509         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1510              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1511                 /* Read desc statuses backwards to avoid race condition */
1512                 for (j = 0; j < LOOK_AHEAD; j++)
1513                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1514
1515                 rte_smp_rmb();
1516
1517                 /* Compute how many status bits were set */
1518                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1519                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1520                         ;
1521
1522                 for (j = 0; j < nb_dd; j++)
1523                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1524                                                        lo_dword.data);
1525
1526                 nb_rx += nb_dd;
1527
1528                 /* Translate descriptor info to mbuf format */
1529                 for (j = 0; j < nb_dd; ++j) {
1530                         mb = rxep[j].mbuf;
1531                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1532                                   rxq->crc_len;
1533                         mb->data_len = pkt_len;
1534                         mb->pkt_len = pkt_len;
1535                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1536
1537                         /* convert descriptor fields to rte mbuf flags */
1538                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1539                                 vlan_flags);
1540                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1541                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1542                                         ((uint16_t)pkt_info[j]);
1543                         mb->ol_flags = pkt_flags;
1544                         mb->packet_type =
1545                                 ixgbe_rxd_pkt_info_to_pkt_type
1546                                         (pkt_info[j], rxq->pkt_type_mask);
1547
1548                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1549                                 mb->hash.rss = rte_le_to_cpu_32(
1550                                     rxdp[j].wb.lower.hi_dword.rss);
1551                         else if (pkt_flags & PKT_RX_FDIR) {
1552                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1553                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1554                                     IXGBE_ATR_HASH_MASK;
1555                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1556                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1557                         }
1558                 }
1559
1560                 /* Move mbuf pointers from the S/W ring to the stage */
1561                 for (j = 0; j < LOOK_AHEAD; ++j) {
1562                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1563                 }
1564
1565                 /* stop if all requested packets could not be received */
1566                 if (nb_dd != LOOK_AHEAD)
1567                         break;
1568         }
1569
1570         /* clear software ring entries so we can cleanup correctly */
1571         for (i = 0; i < nb_rx; ++i) {
1572                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1573         }
1574
1575
1576         return nb_rx;
1577 }
1578
1579 static inline int
1580 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1581 {
1582         volatile union ixgbe_adv_rx_desc *rxdp;
1583         struct ixgbe_rx_entry *rxep;
1584         struct rte_mbuf *mb;
1585         uint16_t alloc_idx;
1586         __le64 dma_addr;
1587         int diag, i;
1588
1589         /* allocate buffers in bulk directly into the S/W ring */
1590         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1591         rxep = &rxq->sw_ring[alloc_idx];
1592         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1593                                     rxq->rx_free_thresh);
1594         if (unlikely(diag != 0))
1595                 return -ENOMEM;
1596
1597         rxdp = &rxq->rx_ring[alloc_idx];
1598         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1599                 /* populate the static rte mbuf fields */
1600                 mb = rxep[i].mbuf;
1601                 if (reset_mbuf) {
1602                         mb->port = rxq->port_id;
1603                 }
1604
1605                 rte_mbuf_refcnt_set(mb, 1);
1606                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1607
1608                 /* populate the descriptors */
1609                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1610                 rxdp[i].read.hdr_addr = 0;
1611                 rxdp[i].read.pkt_addr = dma_addr;
1612         }
1613
1614         /* update state of internal queue structure */
1615         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1616         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1617                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1618
1619         /* no errors */
1620         return 0;
1621 }
1622
1623 static inline uint16_t
1624 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1625                          uint16_t nb_pkts)
1626 {
1627         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1628         int i;
1629
1630         /* how many packets are ready to return? */
1631         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1632
1633         /* copy mbuf pointers to the application's packet list */
1634         for (i = 0; i < nb_pkts; ++i)
1635                 rx_pkts[i] = stage[i];
1636
1637         /* update internal queue state */
1638         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1639         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1640
1641         return nb_pkts;
1642 }
1643
1644 static inline uint16_t
1645 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1646              uint16_t nb_pkts)
1647 {
1648         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1649         uint16_t nb_rx = 0;
1650
1651         /* Any previously recv'd pkts will be returned from the Rx stage */
1652         if (rxq->rx_nb_avail)
1653                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1654
1655         /* Scan the H/W ring for packets to receive */
1656         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1657
1658         /* update internal queue state */
1659         rxq->rx_next_avail = 0;
1660         rxq->rx_nb_avail = nb_rx;
1661         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1662
1663         /* if required, allocate new buffers to replenish descriptors */
1664         if (rxq->rx_tail > rxq->rx_free_trigger) {
1665                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1666
1667                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1668                         int i, j;
1669
1670                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1671                                    "queue_id=%u", (unsigned) rxq->port_id,
1672                                    (unsigned) rxq->queue_id);
1673
1674                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1675                                 rxq->rx_free_thresh;
1676
1677                         /*
1678                          * Need to rewind any previous receives if we cannot
1679                          * allocate new buffers to replenish the old ones.
1680                          */
1681                         rxq->rx_nb_avail = 0;
1682                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1683                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1684                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1685
1686                         return 0;
1687                 }
1688
1689                 /* update tail pointer */
1690                 rte_wmb();
1691                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1692                                             cur_free_trigger);
1693         }
1694
1695         if (rxq->rx_tail >= rxq->nb_rx_desc)
1696                 rxq->rx_tail = 0;
1697
1698         /* received any packets this loop? */
1699         if (rxq->rx_nb_avail)
1700                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1701
1702         return 0;
1703 }
1704
1705 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1706 uint16_t
1707 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1708                            uint16_t nb_pkts)
1709 {
1710         uint16_t nb_rx;
1711
1712         if (unlikely(nb_pkts == 0))
1713                 return 0;
1714
1715         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1716                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1717
1718         /* request is relatively large, chunk it up */
1719         nb_rx = 0;
1720         while (nb_pkts) {
1721                 uint16_t ret, n;
1722
1723                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1724                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1725                 nb_rx = (uint16_t)(nb_rx + ret);
1726                 nb_pkts = (uint16_t)(nb_pkts - ret);
1727                 if (ret < n)
1728                         break;
1729         }
1730
1731         return nb_rx;
1732 }
1733
1734 uint16_t
1735 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1736                 uint16_t nb_pkts)
1737 {
1738         struct ixgbe_rx_queue *rxq;
1739         volatile union ixgbe_adv_rx_desc *rx_ring;
1740         volatile union ixgbe_adv_rx_desc *rxdp;
1741         struct ixgbe_rx_entry *sw_ring;
1742         struct ixgbe_rx_entry *rxe;
1743         struct rte_mbuf *rxm;
1744         struct rte_mbuf *nmb;
1745         union ixgbe_adv_rx_desc rxd;
1746         uint64_t dma_addr;
1747         uint32_t staterr;
1748         uint32_t pkt_info;
1749         uint16_t pkt_len;
1750         uint16_t rx_id;
1751         uint16_t nb_rx;
1752         uint16_t nb_hold;
1753         uint64_t pkt_flags;
1754         uint64_t vlan_flags;
1755
1756         nb_rx = 0;
1757         nb_hold = 0;
1758         rxq = rx_queue;
1759         rx_id = rxq->rx_tail;
1760         rx_ring = rxq->rx_ring;
1761         sw_ring = rxq->sw_ring;
1762         vlan_flags = rxq->vlan_flags;
1763         while (nb_rx < nb_pkts) {
1764                 /*
1765                  * The order of operations here is important as the DD status
1766                  * bit must not be read after any other descriptor fields.
1767                  * rx_ring and rxdp are pointing to volatile data so the order
1768                  * of accesses cannot be reordered by the compiler. If they were
1769                  * not volatile, they could be reordered which could lead to
1770                  * using invalid descriptor fields when read from rxd.
1771                  */
1772                 rxdp = &rx_ring[rx_id];
1773                 staterr = rxdp->wb.upper.status_error;
1774                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1775                         break;
1776                 rxd = *rxdp;
1777
1778                 /*
1779                  * End of packet.
1780                  *
1781                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1782                  * is likely to be invalid and to be dropped by the various
1783                  * validation checks performed by the network stack.
1784                  *
1785                  * Allocate a new mbuf to replenish the RX ring descriptor.
1786                  * If the allocation fails:
1787                  *    - arrange for that RX descriptor to be the first one
1788                  *      being parsed the next time the receive function is
1789                  *      invoked [on the same queue].
1790                  *
1791                  *    - Stop parsing the RX ring and return immediately.
1792                  *
1793                  * This policy do not drop the packet received in the RX
1794                  * descriptor for which the allocation of a new mbuf failed.
1795                  * Thus, it allows that packet to be later retrieved if
1796                  * mbuf have been freed in the mean time.
1797                  * As a side effect, holding RX descriptors instead of
1798                  * systematically giving them back to the NIC may lead to
1799                  * RX ring exhaustion situations.
1800                  * However, the NIC can gracefully prevent such situations
1801                  * to happen by sending specific "back-pressure" flow control
1802                  * frames to its peer(s).
1803                  */
1804                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1805                            "ext_err_stat=0x%08x pkt_len=%u",
1806                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1807                            (unsigned) rx_id, (unsigned) staterr,
1808                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1809
1810                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1811                 if (nmb == NULL) {
1812                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1813                                    "queue_id=%u", (unsigned) rxq->port_id,
1814                                    (unsigned) rxq->queue_id);
1815                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1816                         break;
1817                 }
1818
1819                 nb_hold++;
1820                 rxe = &sw_ring[rx_id];
1821                 rx_id++;
1822                 if (rx_id == rxq->nb_rx_desc)
1823                         rx_id = 0;
1824
1825                 /* Prefetch next mbuf while processing current one. */
1826                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1827
1828                 /*
1829                  * When next RX descriptor is on a cache-line boundary,
1830                  * prefetch the next 4 RX descriptors and the next 8 pointers
1831                  * to mbufs.
1832                  */
1833                 if ((rx_id & 0x3) == 0) {
1834                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1835                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1836                 }
1837
1838                 rxm = rxe->mbuf;
1839                 rxe->mbuf = nmb;
1840                 dma_addr =
1841                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1842                 rxdp->read.hdr_addr = 0;
1843                 rxdp->read.pkt_addr = dma_addr;
1844
1845                 /*
1846                  * Initialize the returned mbuf.
1847                  * 1) setup generic mbuf fields:
1848                  *    - number of segments,
1849                  *    - next segment,
1850                  *    - packet length,
1851                  *    - RX port identifier.
1852                  * 2) integrate hardware offload data, if any:
1853                  *    - RSS flag & hash,
1854                  *    - IP checksum flag,
1855                  *    - VLAN TCI, if any,
1856                  *    - error flags.
1857                  */
1858                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1859                                       rxq->crc_len);
1860                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1861                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1862                 rxm->nb_segs = 1;
1863                 rxm->next = NULL;
1864                 rxm->pkt_len = pkt_len;
1865                 rxm->data_len = pkt_len;
1866                 rxm->port = rxq->port_id;
1867
1868                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1869                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1870                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1871
1872                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1873                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1874                 pkt_flags = pkt_flags |
1875                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1876                 rxm->ol_flags = pkt_flags;
1877                 rxm->packet_type =
1878                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1879                                                        rxq->pkt_type_mask);
1880
1881                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1882                         rxm->hash.rss = rte_le_to_cpu_32(
1883                                                 rxd.wb.lower.hi_dword.rss);
1884                 else if (pkt_flags & PKT_RX_FDIR) {
1885                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1886                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1887                                         IXGBE_ATR_HASH_MASK;
1888                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1889                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1890                 }
1891                 /*
1892                  * Store the mbuf address into the next entry of the array
1893                  * of returned packets.
1894                  */
1895                 rx_pkts[nb_rx++] = rxm;
1896         }
1897         rxq->rx_tail = rx_id;
1898
1899         /*
1900          * If the number of free RX descriptors is greater than the RX free
1901          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1902          * register.
1903          * Update the RDT with the value of the last processed RX descriptor
1904          * minus 1, to guarantee that the RDT register is never equal to the
1905          * RDH register, which creates a "full" ring situtation from the
1906          * hardware point of view...
1907          */
1908         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1909         if (nb_hold > rxq->rx_free_thresh) {
1910                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1911                            "nb_hold=%u nb_rx=%u",
1912                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1913                            (unsigned) rx_id, (unsigned) nb_hold,
1914                            (unsigned) nb_rx);
1915                 rx_id = (uint16_t) ((rx_id == 0) ?
1916                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1917                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1918                 nb_hold = 0;
1919         }
1920         rxq->nb_rx_hold = nb_hold;
1921         return nb_rx;
1922 }
1923
1924 /**
1925  * Detect an RSC descriptor.
1926  */
1927 static inline uint32_t
1928 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1929 {
1930         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1931                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1932 }
1933
1934 /**
1935  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1936  *
1937  * Fill the following info in the HEAD buffer of the Rx cluster:
1938  *    - RX port identifier
1939  *    - hardware offload data, if any:
1940  *      - RSS flag & hash
1941  *      - IP checksum flag
1942  *      - VLAN TCI, if any
1943  *      - error flags
1944  * @head HEAD of the packet cluster
1945  * @desc HW descriptor to get data from
1946  * @rxq Pointer to the Rx queue
1947  */
1948 static inline void
1949 ixgbe_fill_cluster_head_buf(
1950         struct rte_mbuf *head,
1951         union ixgbe_adv_rx_desc *desc,
1952         struct ixgbe_rx_queue *rxq,
1953         uint32_t staterr)
1954 {
1955         uint32_t pkt_info;
1956         uint64_t pkt_flags;
1957
1958         head->port = rxq->port_id;
1959
1960         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1961          * set in the pkt_flags field.
1962          */
1963         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1964         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1965         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1966         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1967         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1968         head->ol_flags = pkt_flags;
1969         head->packet_type =
1970                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1971
1972         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1973                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1974         else if (pkt_flags & PKT_RX_FDIR) {
1975                 head->hash.fdir.hash =
1976                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1977                                                           & IXGBE_ATR_HASH_MASK;
1978                 head->hash.fdir.id =
1979                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1980         }
1981 }
1982
1983 /**
1984  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1985  *
1986  * @rx_queue Rx queue handle
1987  * @rx_pkts table of received packets
1988  * @nb_pkts size of rx_pkts table
1989  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1990  *
1991  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1992  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1993  *
1994  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1995  * 1) When non-EOP RSC completion arrives:
1996  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1997  *       segment's data length.
1998  *    b) Set the "next" pointer of the current segment to point to the segment
1999  *       at the NEXTP index.
2000  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2001  *       in the sw_rsc_ring.
2002  * 2) When EOP arrives we just update the cluster's total length and offload
2003  *    flags and deliver the cluster up to the upper layers. In our case - put it
2004  *    in the rx_pkts table.
2005  *
2006  * Returns the number of received packets/clusters (according to the "bulk
2007  * receive" interface).
2008  */
2009 static inline uint16_t
2010 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2011                     bool bulk_alloc)
2012 {
2013         struct ixgbe_rx_queue *rxq = rx_queue;
2014         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2015         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2016         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2017         uint16_t rx_id = rxq->rx_tail;
2018         uint16_t nb_rx = 0;
2019         uint16_t nb_hold = rxq->nb_rx_hold;
2020         uint16_t prev_id = rxq->rx_tail;
2021
2022         while (nb_rx < nb_pkts) {
2023                 bool eop;
2024                 struct ixgbe_rx_entry *rxe;
2025                 struct ixgbe_scattered_rx_entry *sc_entry;
2026                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2027                 struct ixgbe_rx_entry *next_rxe = NULL;
2028                 struct rte_mbuf *first_seg;
2029                 struct rte_mbuf *rxm;
2030                 struct rte_mbuf *nmb = NULL;
2031                 union ixgbe_adv_rx_desc rxd;
2032                 uint16_t data_len;
2033                 uint16_t next_id;
2034                 volatile union ixgbe_adv_rx_desc *rxdp;
2035                 uint32_t staterr;
2036
2037 next_desc:
2038                 /*
2039                  * The code in this whole file uses the volatile pointer to
2040                  * ensure the read ordering of the status and the rest of the
2041                  * descriptor fields (on the compiler level only!!!). This is so
2042                  * UGLY - why not to just use the compiler barrier instead? DPDK
2043                  * even has the rte_compiler_barrier() for that.
2044                  *
2045                  * But most importantly this is just wrong because this doesn't
2046                  * ensure memory ordering in a general case at all. For
2047                  * instance, DPDK is supposed to work on Power CPUs where
2048                  * compiler barrier may just not be enough!
2049                  *
2050                  * I tried to write only this function properly to have a
2051                  * starting point (as a part of an LRO/RSC series) but the
2052                  * compiler cursed at me when I tried to cast away the
2053                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2054                  * keeping it the way it is for now.
2055                  *
2056                  * The code in this file is broken in so many other places and
2057                  * will just not work on a big endian CPU anyway therefore the
2058                  * lines below will have to be revisited together with the rest
2059                  * of the ixgbe PMD.
2060                  *
2061                  * TODO:
2062                  *    - Get rid of "volatile" and let the compiler do its job.
2063                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2064                  *      memory ordering below.
2065                  */
2066                 rxdp = &rx_ring[rx_id];
2067                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2068
2069                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2070                         break;
2071
2072                 rxd = *rxdp;
2073
2074                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2075                                   "staterr=0x%x data_len=%u",
2076                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2077                            rte_le_to_cpu_16(rxd.wb.upper.length));
2078
2079                 if (!bulk_alloc) {
2080                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2081                         if (nmb == NULL) {
2082                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2083                                                   "port_id=%u queue_id=%u",
2084                                            rxq->port_id, rxq->queue_id);
2085
2086                                 rte_eth_devices[rxq->port_id].data->
2087                                                         rx_mbuf_alloc_failed++;
2088                                 break;
2089                         }
2090                 } else if (nb_hold > rxq->rx_free_thresh) {
2091                         uint16_t next_rdt = rxq->rx_free_trigger;
2092
2093                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2094                                 rte_wmb();
2095                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2096                                                             next_rdt);
2097                                 nb_hold -= rxq->rx_free_thresh;
2098                         } else {
2099                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2100                                                   "port_id=%u queue_id=%u",
2101                                            rxq->port_id, rxq->queue_id);
2102
2103                                 rte_eth_devices[rxq->port_id].data->
2104                                                         rx_mbuf_alloc_failed++;
2105                                 break;
2106                         }
2107                 }
2108
2109                 nb_hold++;
2110                 rxe = &sw_ring[rx_id];
2111                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2112
2113                 next_id = rx_id + 1;
2114                 if (next_id == rxq->nb_rx_desc)
2115                         next_id = 0;
2116
2117                 /* Prefetch next mbuf while processing current one. */
2118                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2119
2120                 /*
2121                  * When next RX descriptor is on a cache-line boundary,
2122                  * prefetch the next 4 RX descriptors and the next 4 pointers
2123                  * to mbufs.
2124                  */
2125                 if ((next_id & 0x3) == 0) {
2126                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2127                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2128                 }
2129
2130                 rxm = rxe->mbuf;
2131
2132                 if (!bulk_alloc) {
2133                         __le64 dma =
2134                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2135                         /*
2136                          * Update RX descriptor with the physical address of the
2137                          * new data buffer of the new allocated mbuf.
2138                          */
2139                         rxe->mbuf = nmb;
2140
2141                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2142                         rxdp->read.hdr_addr = 0;
2143                         rxdp->read.pkt_addr = dma;
2144                 } else
2145                         rxe->mbuf = NULL;
2146
2147                 /*
2148                  * Set data length & data buffer address of mbuf.
2149                  */
2150                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2151                 rxm->data_len = data_len;
2152
2153                 if (!eop) {
2154                         uint16_t nextp_id;
2155                         /*
2156                          * Get next descriptor index:
2157                          *  - For RSC it's in the NEXTP field.
2158                          *  - For a scattered packet - it's just a following
2159                          *    descriptor.
2160                          */
2161                         if (ixgbe_rsc_count(&rxd))
2162                                 nextp_id =
2163                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2164                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2165                         else
2166                                 nextp_id = next_id;
2167
2168                         next_sc_entry = &sw_sc_ring[nextp_id];
2169                         next_rxe = &sw_ring[nextp_id];
2170                         rte_ixgbe_prefetch(next_rxe);
2171                 }
2172
2173                 sc_entry = &sw_sc_ring[rx_id];
2174                 first_seg = sc_entry->fbuf;
2175                 sc_entry->fbuf = NULL;
2176
2177                 /*
2178                  * If this is the first buffer of the received packet,
2179                  * set the pointer to the first mbuf of the packet and
2180                  * initialize its context.
2181                  * Otherwise, update the total length and the number of segments
2182                  * of the current scattered packet, and update the pointer to
2183                  * the last mbuf of the current packet.
2184                  */
2185                 if (first_seg == NULL) {
2186                         first_seg = rxm;
2187                         first_seg->pkt_len = data_len;
2188                         first_seg->nb_segs = 1;
2189                 } else {
2190                         first_seg->pkt_len += data_len;
2191                         first_seg->nb_segs++;
2192                 }
2193
2194                 prev_id = rx_id;
2195                 rx_id = next_id;
2196
2197                 /*
2198                  * If this is not the last buffer of the received packet, update
2199                  * the pointer to the first mbuf at the NEXTP entry in the
2200                  * sw_sc_ring and continue to parse the RX ring.
2201                  */
2202                 if (!eop && next_rxe) {
2203                         rxm->next = next_rxe->mbuf;
2204                         next_sc_entry->fbuf = first_seg;
2205                         goto next_desc;
2206                 }
2207
2208                 /* Initialize the first mbuf of the returned packet */
2209                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2210
2211                 /*
2212                  * Deal with the case, when HW CRC srip is disabled.
2213                  * That can't happen when LRO is enabled, but still could
2214                  * happen for scattered RX mode.
2215                  */
2216                 first_seg->pkt_len -= rxq->crc_len;
2217                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2218                         struct rte_mbuf *lp;
2219
2220                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2221                                 ;
2222
2223                         first_seg->nb_segs--;
2224                         lp->data_len -= rxq->crc_len - rxm->data_len;
2225                         lp->next = NULL;
2226                         rte_pktmbuf_free_seg(rxm);
2227                 } else
2228                         rxm->data_len -= rxq->crc_len;
2229
2230                 /* Prefetch data of first segment, if configured to do so. */
2231                 rte_packet_prefetch((char *)first_seg->buf_addr +
2232                         first_seg->data_off);
2233
2234                 /*
2235                  * Store the mbuf address into the next entry of the array
2236                  * of returned packets.
2237                  */
2238                 rx_pkts[nb_rx++] = first_seg;
2239         }
2240
2241         /*
2242          * Record index of the next RX descriptor to probe.
2243          */
2244         rxq->rx_tail = rx_id;
2245
2246         /*
2247          * If the number of free RX descriptors is greater than the RX free
2248          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2249          * register.
2250          * Update the RDT with the value of the last processed RX descriptor
2251          * minus 1, to guarantee that the RDT register is never equal to the
2252          * RDH register, which creates a "full" ring situtation from the
2253          * hardware point of view...
2254          */
2255         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2256                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2257                            "nb_hold=%u nb_rx=%u",
2258                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2259
2260                 rte_wmb();
2261                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2262                 nb_hold = 0;
2263         }
2264
2265         rxq->nb_rx_hold = nb_hold;
2266         return nb_rx;
2267 }
2268
2269 uint16_t
2270 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2271                                  uint16_t nb_pkts)
2272 {
2273         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2274 }
2275
2276 uint16_t
2277 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2278                                uint16_t nb_pkts)
2279 {
2280         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2281 }
2282
2283 /*********************************************************************
2284  *
2285  *  Queue management functions
2286  *
2287  **********************************************************************/
2288
2289 static void __attribute__((cold))
2290 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2291 {
2292         unsigned i;
2293
2294         if (txq->sw_ring != NULL) {
2295                 for (i = 0; i < txq->nb_tx_desc; i++) {
2296                         if (txq->sw_ring[i].mbuf != NULL) {
2297                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2298                                 txq->sw_ring[i].mbuf = NULL;
2299                         }
2300                 }
2301         }
2302 }
2303
2304 static int
2305 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2306 {
2307         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2308         uint16_t i, tx_last, tx_id;
2309         uint16_t nb_tx_free_last;
2310         uint16_t nb_tx_to_clean;
2311         uint32_t pkt_cnt;
2312
2313         /* Start free mbuf from the next of tx_tail */
2314         tx_last = txq->tx_tail;
2315         tx_id  = swr_ring[tx_last].next_id;
2316
2317         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2318                 return 0;
2319
2320         nb_tx_to_clean = txq->nb_tx_free;
2321         nb_tx_free_last = txq->nb_tx_free;
2322         if (!free_cnt)
2323                 free_cnt = txq->nb_tx_desc;
2324
2325         /* Loop through swr_ring to count the amount of
2326          * freeable mubfs and packets.
2327          */
2328         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2329                 for (i = 0; i < nb_tx_to_clean &&
2330                         pkt_cnt < free_cnt &&
2331                         tx_id != tx_last; i++) {
2332                         if (swr_ring[tx_id].mbuf != NULL) {
2333                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2334                                 swr_ring[tx_id].mbuf = NULL;
2335
2336                                 /*
2337                                  * last segment in the packet,
2338                                  * increment packet count
2339                                  */
2340                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2341                         }
2342
2343                         tx_id = swr_ring[tx_id].next_id;
2344                 }
2345
2346                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2347                         txq->nb_tx_free || tx_id == tx_last)
2348                         break;
2349
2350                 if (pkt_cnt < free_cnt) {
2351                         if (ixgbe_xmit_cleanup(txq))
2352                                 break;
2353
2354                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2355                         nb_tx_free_last = txq->nb_tx_free;
2356                 }
2357         }
2358
2359         return (int)pkt_cnt;
2360 }
2361
2362 static int
2363 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2364                         uint32_t free_cnt)
2365 {
2366         int i, n, cnt;
2367
2368         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2369                 free_cnt = txq->nb_tx_desc;
2370
2371         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2372
2373         for (i = 0; i < cnt; i += n) {
2374                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2375                         break;
2376
2377                 n = ixgbe_tx_free_bufs(txq);
2378
2379                 if (n == 0)
2380                         break;
2381         }
2382
2383         return i;
2384 }
2385
2386 static int
2387 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2388                         uint32_t free_cnt __rte_unused)
2389 {
2390         return -ENOTSUP;
2391 }
2392
2393 int
2394 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2395 {
2396         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2397         if (txq->offloads == 0 &&
2398 #ifdef RTE_LIBRTE_SECURITY
2399                         !(txq->using_ipsec) &&
2400 #endif
2401                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2402                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2403                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2404                                         txq->sw_ring_v != NULL)) {
2405                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2406                 } else {
2407                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2408                 }
2409         }
2410
2411         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2412 }
2413
2414 static void __attribute__((cold))
2415 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2416 {
2417         if (txq != NULL &&
2418             txq->sw_ring != NULL)
2419                 rte_free(txq->sw_ring);
2420 }
2421
2422 static void __attribute__((cold))
2423 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2424 {
2425         if (txq != NULL && txq->ops != NULL) {
2426                 txq->ops->release_mbufs(txq);
2427                 txq->ops->free_swring(txq);
2428                 rte_free(txq);
2429         }
2430 }
2431
2432 void __attribute__((cold))
2433 ixgbe_dev_tx_queue_release(void *txq)
2434 {
2435         ixgbe_tx_queue_release(txq);
2436 }
2437
2438 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2439 static void __attribute__((cold))
2440 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2441 {
2442         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2443         struct ixgbe_tx_entry *txe = txq->sw_ring;
2444         uint16_t prev, i;
2445
2446         /* Zero out HW ring memory */
2447         for (i = 0; i < txq->nb_tx_desc; i++) {
2448                 txq->tx_ring[i] = zeroed_desc;
2449         }
2450
2451         /* Initialize SW ring entries */
2452         prev = (uint16_t) (txq->nb_tx_desc - 1);
2453         for (i = 0; i < txq->nb_tx_desc; i++) {
2454                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2455
2456                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2457                 txe[i].mbuf = NULL;
2458                 txe[i].last_id = i;
2459                 txe[prev].next_id = i;
2460                 prev = i;
2461         }
2462
2463         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2464         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2465
2466         txq->tx_tail = 0;
2467         txq->nb_tx_used = 0;
2468         /*
2469          * Always allow 1 descriptor to be un-allocated to avoid
2470          * a H/W race condition
2471          */
2472         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2473         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2474         txq->ctx_curr = 0;
2475         memset((void *)&txq->ctx_cache, 0,
2476                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2477 }
2478
2479 static const struct ixgbe_txq_ops def_txq_ops = {
2480         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2481         .free_swring = ixgbe_tx_free_swring,
2482         .reset = ixgbe_reset_tx_queue,
2483 };
2484
2485 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2486  * the queue parameters. Used in tx_queue_setup by primary process and then
2487  * in dev_init by secondary process when attaching to an existing ethdev.
2488  */
2489 void __attribute__((cold))
2490 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2491 {
2492         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2493         if ((txq->offloads == 0) &&
2494 #ifdef RTE_LIBRTE_SECURITY
2495                         !(txq->using_ipsec) &&
2496 #endif
2497                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2498                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2499                 dev->tx_pkt_prepare = NULL;
2500 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2501                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2502                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2503                                         ixgbe_txq_vec_setup(txq) == 0)) {
2504                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2505                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2506                 } else
2507 #endif
2508                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2509         } else {
2510                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2511                 PMD_INIT_LOG(DEBUG,
2512                                 " - offloads = 0x%" PRIx64,
2513                                 txq->offloads);
2514                 PMD_INIT_LOG(DEBUG,
2515                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2516                                 (unsigned long)txq->tx_rs_thresh,
2517                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2518                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2519                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2520         }
2521 }
2522
2523 uint64_t
2524 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2525 {
2526         RTE_SET_USED(dev);
2527
2528         return 0;
2529 }
2530
2531 uint64_t
2532 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2533 {
2534         uint64_t tx_offload_capa;
2535         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2536
2537         tx_offload_capa =
2538                 DEV_TX_OFFLOAD_VLAN_INSERT |
2539                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2540                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2541                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2542                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2543                 DEV_TX_OFFLOAD_TCP_TSO     |
2544                 DEV_TX_OFFLOAD_MULTI_SEGS;
2545
2546         if (hw->mac.type == ixgbe_mac_82599EB ||
2547             hw->mac.type == ixgbe_mac_X540)
2548                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2549
2550         if (hw->mac.type == ixgbe_mac_X550 ||
2551             hw->mac.type == ixgbe_mac_X550EM_x ||
2552             hw->mac.type == ixgbe_mac_X550EM_a)
2553                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2554
2555 #ifdef RTE_LIBRTE_SECURITY
2556         if (dev->security_ctx)
2557                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2558 #endif
2559         return tx_offload_capa;
2560 }
2561
2562 int __attribute__((cold))
2563 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2564                          uint16_t queue_idx,
2565                          uint16_t nb_desc,
2566                          unsigned int socket_id,
2567                          const struct rte_eth_txconf *tx_conf)
2568 {
2569         const struct rte_memzone *tz;
2570         struct ixgbe_tx_queue *txq;
2571         struct ixgbe_hw     *hw;
2572         uint16_t tx_rs_thresh, tx_free_thresh;
2573         uint64_t offloads;
2574
2575         PMD_INIT_FUNC_TRACE();
2576         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2577
2578         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2579
2580         /*
2581          * Validate number of transmit descriptors.
2582          * It must not exceed hardware maximum, and must be multiple
2583          * of IXGBE_ALIGN.
2584          */
2585         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2586                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2587                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2588                 return -EINVAL;
2589         }
2590
2591         /*
2592          * The following two parameters control the setting of the RS bit on
2593          * transmit descriptors.
2594          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2595          * descriptors have been used.
2596          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2597          * descriptors are used or if the number of descriptors required
2598          * to transmit a packet is greater than the number of free TX
2599          * descriptors.
2600          * The following constraints must be satisfied:
2601          *  tx_rs_thresh must be greater than 0.
2602          *  tx_rs_thresh must be less than the size of the ring minus 2.
2603          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2604          *  tx_rs_thresh must be a divisor of the ring size.
2605          *  tx_free_thresh must be greater than 0.
2606          *  tx_free_thresh must be less than the size of the ring minus 3.
2607          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2608          * One descriptor in the TX ring is used as a sentinel to avoid a
2609          * H/W race condition, hence the maximum threshold constraints.
2610          * When set to zero use default values.
2611          */
2612         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2613                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2614         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2615         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2616                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2617         if (tx_conf->tx_rs_thresh > 0)
2618                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2619         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2620                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2621                              "exceed nb_desc. (tx_rs_thresh=%u "
2622                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2623                              (unsigned int)tx_rs_thresh,
2624                              (unsigned int)tx_free_thresh,
2625                              (unsigned int)nb_desc,
2626                              (int)dev->data->port_id,
2627                              (int)queue_idx);
2628                 return -(EINVAL);
2629         }
2630         if (tx_rs_thresh >= (nb_desc - 2)) {
2631                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2632                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2633                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2634                         (int)dev->data->port_id, (int)queue_idx);
2635                 return -(EINVAL);
2636         }
2637         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2638                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2639                         "(tx_rs_thresh=%u port=%d queue=%d)",
2640                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2641                         (int)dev->data->port_id, (int)queue_idx);
2642                 return -(EINVAL);
2643         }
2644         if (tx_free_thresh >= (nb_desc - 3)) {
2645                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2646                              "tx_free_thresh must be less than the number of "
2647                              "TX descriptors minus 3. (tx_free_thresh=%u "
2648                              "port=%d queue=%d)",
2649                              (unsigned int)tx_free_thresh,
2650                              (int)dev->data->port_id, (int)queue_idx);
2651                 return -(EINVAL);
2652         }
2653         if (tx_rs_thresh > tx_free_thresh) {
2654                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2655                              "tx_free_thresh. (tx_free_thresh=%u "
2656                              "tx_rs_thresh=%u port=%d queue=%d)",
2657                              (unsigned int)tx_free_thresh,
2658                              (unsigned int)tx_rs_thresh,
2659                              (int)dev->data->port_id,
2660                              (int)queue_idx);
2661                 return -(EINVAL);
2662         }
2663         if ((nb_desc % tx_rs_thresh) != 0) {
2664                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2665                              "number of TX descriptors. (tx_rs_thresh=%u "
2666                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2667                              (int)dev->data->port_id, (int)queue_idx);
2668                 return -(EINVAL);
2669         }
2670
2671         /*
2672          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2673          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2674          * by the NIC and all descriptors are written back after the NIC
2675          * accumulates WTHRESH descriptors.
2676          */
2677         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2678                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2679                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2680                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2681                              (int)dev->data->port_id, (int)queue_idx);
2682                 return -(EINVAL);
2683         }
2684
2685         /* Free memory prior to re-allocation if needed... */
2686         if (dev->data->tx_queues[queue_idx] != NULL) {
2687                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2688                 dev->data->tx_queues[queue_idx] = NULL;
2689         }
2690
2691         /* First allocate the tx queue data structure */
2692         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2693                                  RTE_CACHE_LINE_SIZE, socket_id);
2694         if (txq == NULL)
2695                 return -ENOMEM;
2696
2697         /*
2698          * Allocate TX ring hardware descriptors. A memzone large enough to
2699          * handle the maximum ring size is allocated in order to allow for
2700          * resizing in later calls to the queue setup function.
2701          */
2702         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2703                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2704                         IXGBE_ALIGN, socket_id);
2705         if (tz == NULL) {
2706                 ixgbe_tx_queue_release(txq);
2707                 return -ENOMEM;
2708         }
2709
2710         txq->nb_tx_desc = nb_desc;
2711         txq->tx_rs_thresh = tx_rs_thresh;
2712         txq->tx_free_thresh = tx_free_thresh;
2713         txq->pthresh = tx_conf->tx_thresh.pthresh;
2714         txq->hthresh = tx_conf->tx_thresh.hthresh;
2715         txq->wthresh = tx_conf->tx_thresh.wthresh;
2716         txq->queue_id = queue_idx;
2717         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2718                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2719         txq->port_id = dev->data->port_id;
2720         txq->offloads = offloads;
2721         txq->ops = &def_txq_ops;
2722         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2723 #ifdef RTE_LIBRTE_SECURITY
2724         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2725                         DEV_TX_OFFLOAD_SECURITY);
2726 #endif
2727
2728         /*
2729          * Modification to set VFTDT for virtual function if vf is detected
2730          */
2731         if (hw->mac.type == ixgbe_mac_82599_vf ||
2732             hw->mac.type == ixgbe_mac_X540_vf ||
2733             hw->mac.type == ixgbe_mac_X550_vf ||
2734             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2735             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2736                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2737         else
2738                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2739
2740         txq->tx_ring_phys_addr = tz->iova;
2741         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2742
2743         /* Allocate software ring */
2744         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2745                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2746                                 RTE_CACHE_LINE_SIZE, socket_id);
2747         if (txq->sw_ring == NULL) {
2748                 ixgbe_tx_queue_release(txq);
2749                 return -ENOMEM;
2750         }
2751         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2752                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2753
2754         /* set up vector or scalar TX function as appropriate */
2755         ixgbe_set_tx_function(dev, txq);
2756
2757         txq->ops->reset(txq);
2758
2759         dev->data->tx_queues[queue_idx] = txq;
2760
2761
2762         return 0;
2763 }
2764
2765 /**
2766  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2767  *
2768  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2769  * in the sw_rsc_ring is not set to NULL but rather points to the next
2770  * mbuf of this RSC aggregation (that has not been completed yet and still
2771  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2772  * will just free first "nb_segs" segments of the cluster explicitly by calling
2773  * an rte_pktmbuf_free_seg().
2774  *
2775  * @m scattered cluster head
2776  */
2777 static void __attribute__((cold))
2778 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2779 {
2780         uint16_t i, nb_segs = m->nb_segs;
2781         struct rte_mbuf *next_seg;
2782
2783         for (i = 0; i < nb_segs; i++) {
2784                 next_seg = m->next;
2785                 rte_pktmbuf_free_seg(m);
2786                 m = next_seg;
2787         }
2788 }
2789
2790 static void __attribute__((cold))
2791 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2792 {
2793         unsigned i;
2794
2795 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2796         /* SSE Vector driver has a different way of releasing mbufs. */
2797         if (rxq->rx_using_sse) {
2798                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2799                 return;
2800         }
2801 #endif
2802
2803         if (rxq->sw_ring != NULL) {
2804                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2805                         if (rxq->sw_ring[i].mbuf != NULL) {
2806                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2807                                 rxq->sw_ring[i].mbuf = NULL;
2808                         }
2809                 }
2810                 if (rxq->rx_nb_avail) {
2811                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2812                                 struct rte_mbuf *mb;
2813
2814                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2815                                 rte_pktmbuf_free_seg(mb);
2816                         }
2817                         rxq->rx_nb_avail = 0;
2818                 }
2819         }
2820
2821         if (rxq->sw_sc_ring)
2822                 for (i = 0; i < rxq->nb_rx_desc; i++)
2823                         if (rxq->sw_sc_ring[i].fbuf) {
2824                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2825                                 rxq->sw_sc_ring[i].fbuf = NULL;
2826                         }
2827 }
2828
2829 static void __attribute__((cold))
2830 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2831 {
2832         if (rxq != NULL) {
2833                 ixgbe_rx_queue_release_mbufs(rxq);
2834                 rte_free(rxq->sw_ring);
2835                 rte_free(rxq->sw_sc_ring);
2836                 rte_free(rxq);
2837         }
2838 }
2839
2840 void __attribute__((cold))
2841 ixgbe_dev_rx_queue_release(void *rxq)
2842 {
2843         ixgbe_rx_queue_release(rxq);
2844 }
2845
2846 /*
2847  * Check if Rx Burst Bulk Alloc function can be used.
2848  * Return
2849  *        0: the preconditions are satisfied and the bulk allocation function
2850  *           can be used.
2851  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2852  *           function must be used.
2853  */
2854 static inline int __attribute__((cold))
2855 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2856 {
2857         int ret = 0;
2858
2859         /*
2860          * Make sure the following pre-conditions are satisfied:
2861          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2862          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2863          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2864          * Scattered packets are not supported.  This should be checked
2865          * outside of this function.
2866          */
2867         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2868                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2869                              "rxq->rx_free_thresh=%d, "
2870                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2871                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2872                 ret = -EINVAL;
2873         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2874                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2875                              "rxq->rx_free_thresh=%d, "
2876                              "rxq->nb_rx_desc=%d",
2877                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2878                 ret = -EINVAL;
2879         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2880                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2881                              "rxq->nb_rx_desc=%d, "
2882                              "rxq->rx_free_thresh=%d",
2883                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2884                 ret = -EINVAL;
2885         }
2886
2887         return ret;
2888 }
2889
2890 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2891 static void __attribute__((cold))
2892 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2893 {
2894         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2895         unsigned i;
2896         uint16_t len = rxq->nb_rx_desc;
2897
2898         /*
2899          * By default, the Rx queue setup function allocates enough memory for
2900          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2901          * extra memory at the end of the descriptor ring to be zero'd out.
2902          */
2903         if (adapter->rx_bulk_alloc_allowed)
2904                 /* zero out extra memory */
2905                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2906
2907         /*
2908          * Zero out HW ring memory. Zero out extra memory at the end of
2909          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2910          * reads extra memory as zeros.
2911          */
2912         for (i = 0; i < len; i++) {
2913                 rxq->rx_ring[i] = zeroed_desc;
2914         }
2915
2916         /*
2917          * initialize extra software ring entries. Space for these extra
2918          * entries is always allocated
2919          */
2920         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2921         for (i = rxq->nb_rx_desc; i < len; ++i) {
2922                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2923         }
2924
2925         rxq->rx_nb_avail = 0;
2926         rxq->rx_next_avail = 0;
2927         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2928         rxq->rx_tail = 0;
2929         rxq->nb_rx_hold = 0;
2930         rxq->pkt_first_seg = NULL;
2931         rxq->pkt_last_seg = NULL;
2932
2933 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2934         rxq->rxrearm_start = 0;
2935         rxq->rxrearm_nb = 0;
2936 #endif
2937 }
2938
2939 static int
2940 ixgbe_is_vf(struct rte_eth_dev *dev)
2941 {
2942         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2943
2944         switch (hw->mac.type) {
2945         case ixgbe_mac_82599_vf:
2946         case ixgbe_mac_X540_vf:
2947         case ixgbe_mac_X550_vf:
2948         case ixgbe_mac_X550EM_x_vf:
2949         case ixgbe_mac_X550EM_a_vf:
2950                 return 1;
2951         default:
2952                 return 0;
2953         }
2954 }
2955
2956 uint64_t
2957 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2958 {
2959         uint64_t offloads = 0;
2960         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2961
2962         if (hw->mac.type != ixgbe_mac_82598EB)
2963                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2964
2965         return offloads;
2966 }
2967
2968 uint64_t
2969 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2970 {
2971         uint64_t offloads;
2972         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2973
2974         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2975                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2976                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2977                    DEV_RX_OFFLOAD_KEEP_CRC    |
2978                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2979                    DEV_RX_OFFLOAD_VLAN_FILTER |
2980                    DEV_RX_OFFLOAD_SCATTER |
2981                    DEV_RX_OFFLOAD_RSS_HASH;
2982
2983         if (hw->mac.type == ixgbe_mac_82598EB)
2984                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2985
2986         if (ixgbe_is_vf(dev) == 0)
2987                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2988
2989         /*
2990          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2991          * mode.
2992          */
2993         if ((hw->mac.type == ixgbe_mac_82599EB ||
2994              hw->mac.type == ixgbe_mac_X540 ||
2995              hw->mac.type == ixgbe_mac_X550) &&
2996             !RTE_ETH_DEV_SRIOV(dev).active)
2997                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
2998
2999         if (hw->mac.type == ixgbe_mac_82599EB ||
3000             hw->mac.type == ixgbe_mac_X540)
3001                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3002
3003         if (hw->mac.type == ixgbe_mac_X550 ||
3004             hw->mac.type == ixgbe_mac_X550EM_x ||
3005             hw->mac.type == ixgbe_mac_X550EM_a)
3006                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3007
3008 #ifdef RTE_LIBRTE_SECURITY
3009         if (dev->security_ctx)
3010                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3011 #endif
3012
3013         return offloads;
3014 }
3015
3016 int __attribute__((cold))
3017 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3018                          uint16_t queue_idx,
3019                          uint16_t nb_desc,
3020                          unsigned int socket_id,
3021                          const struct rte_eth_rxconf *rx_conf,
3022                          struct rte_mempool *mp)
3023 {
3024         const struct rte_memzone *rz;
3025         struct ixgbe_rx_queue *rxq;
3026         struct ixgbe_hw     *hw;
3027         uint16_t len;
3028         struct ixgbe_adapter *adapter = dev->data->dev_private;
3029         uint64_t offloads;
3030
3031         PMD_INIT_FUNC_TRACE();
3032         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3033
3034         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3035
3036         /*
3037          * Validate number of receive descriptors.
3038          * It must not exceed hardware maximum, and must be multiple
3039          * of IXGBE_ALIGN.
3040          */
3041         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3042                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3043                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3044                 return -EINVAL;
3045         }
3046
3047         /* Free memory prior to re-allocation if needed... */
3048         if (dev->data->rx_queues[queue_idx] != NULL) {
3049                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3050                 dev->data->rx_queues[queue_idx] = NULL;
3051         }
3052
3053         /* First allocate the rx queue data structure */
3054         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3055                                  RTE_CACHE_LINE_SIZE, socket_id);
3056         if (rxq == NULL)
3057                 return -ENOMEM;
3058         rxq->mb_pool = mp;
3059         rxq->nb_rx_desc = nb_desc;
3060         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3061         rxq->queue_id = queue_idx;
3062         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3063                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3064         rxq->port_id = dev->data->port_id;
3065         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3066                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3067         else
3068                 rxq->crc_len = 0;
3069         rxq->drop_en = rx_conf->rx_drop_en;
3070         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3071         rxq->offloads = offloads;
3072
3073         /*
3074          * The packet type in RX descriptor is different for different NICs.
3075          * Some bits are used for x550 but reserved for other NICS.
3076          * So set different masks for different NICs.
3077          */
3078         if (hw->mac.type == ixgbe_mac_X550 ||
3079             hw->mac.type == ixgbe_mac_X550EM_x ||
3080             hw->mac.type == ixgbe_mac_X550EM_a ||
3081             hw->mac.type == ixgbe_mac_X550_vf ||
3082             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3083             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3084                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3085         else
3086                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3087
3088         /*
3089          * Allocate RX ring hardware descriptors. A memzone large enough to
3090          * handle the maximum ring size is allocated in order to allow for
3091          * resizing in later calls to the queue setup function.
3092          */
3093         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3094                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3095         if (rz == NULL) {
3096                 ixgbe_rx_queue_release(rxq);
3097                 return -ENOMEM;
3098         }
3099
3100         /*
3101          * Zero init all the descriptors in the ring.
3102          */
3103         memset(rz->addr, 0, RX_RING_SZ);
3104
3105         /*
3106          * Modified to setup VFRDT for Virtual Function
3107          */
3108         if (hw->mac.type == ixgbe_mac_82599_vf ||
3109             hw->mac.type == ixgbe_mac_X540_vf ||
3110             hw->mac.type == ixgbe_mac_X550_vf ||
3111             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3112             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3113                 rxq->rdt_reg_addr =
3114                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3115                 rxq->rdh_reg_addr =
3116                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3117         } else {
3118                 rxq->rdt_reg_addr =
3119                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3120                 rxq->rdh_reg_addr =
3121                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3122         }
3123
3124         rxq->rx_ring_phys_addr = rz->iova;
3125         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3126
3127         /*
3128          * Certain constraints must be met in order to use the bulk buffer
3129          * allocation Rx burst function. If any of Rx queues doesn't meet them
3130          * the feature should be disabled for the whole port.
3131          */
3132         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3133                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3134                                     "preconditions - canceling the feature for "
3135                                     "the whole port[%d]",
3136                              rxq->queue_id, rxq->port_id);
3137                 adapter->rx_bulk_alloc_allowed = false;
3138         }
3139
3140         /*
3141          * Allocate software ring. Allow for space at the end of the
3142          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3143          * function does not access an invalid memory region.
3144          */
3145         len = nb_desc;
3146         if (adapter->rx_bulk_alloc_allowed)
3147                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3148
3149         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3150                                           sizeof(struct ixgbe_rx_entry) * len,
3151                                           RTE_CACHE_LINE_SIZE, socket_id);
3152         if (!rxq->sw_ring) {
3153                 ixgbe_rx_queue_release(rxq);
3154                 return -ENOMEM;
3155         }
3156
3157         /*
3158          * Always allocate even if it's not going to be needed in order to
3159          * simplify the code.
3160          *
3161          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3162          * be requested in ixgbe_dev_rx_init(), which is called later from
3163          * dev_start() flow.
3164          */
3165         rxq->sw_sc_ring =
3166                 rte_zmalloc_socket("rxq->sw_sc_ring",
3167                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3168                                    RTE_CACHE_LINE_SIZE, socket_id);
3169         if (!rxq->sw_sc_ring) {
3170                 ixgbe_rx_queue_release(rxq);
3171                 return -ENOMEM;
3172         }
3173
3174         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3175                             "dma_addr=0x%"PRIx64,
3176                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3177                      rxq->rx_ring_phys_addr);
3178
3179         if (!rte_is_power_of_2(nb_desc)) {
3180                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3181                                     "preconditions - canceling the feature for "
3182                                     "the whole port[%d]",
3183                              rxq->queue_id, rxq->port_id);
3184                 adapter->rx_vec_allowed = false;
3185         } else
3186                 ixgbe_rxq_vec_setup(rxq);
3187
3188         dev->data->rx_queues[queue_idx] = rxq;
3189
3190         ixgbe_reset_rx_queue(adapter, rxq);
3191
3192         return 0;
3193 }
3194
3195 uint32_t
3196 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3197 {
3198 #define IXGBE_RXQ_SCAN_INTERVAL 4
3199         volatile union ixgbe_adv_rx_desc *rxdp;
3200         struct ixgbe_rx_queue *rxq;
3201         uint32_t desc = 0;
3202
3203         rxq = dev->data->rx_queues[rx_queue_id];
3204         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3205
3206         while ((desc < rxq->nb_rx_desc) &&
3207                 (rxdp->wb.upper.status_error &
3208                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3209                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3210                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3211                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3212                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3213                                 desc - rxq->nb_rx_desc]);
3214         }
3215
3216         return desc;
3217 }
3218
3219 int
3220 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3221 {
3222         volatile union ixgbe_adv_rx_desc *rxdp;
3223         struct ixgbe_rx_queue *rxq = rx_queue;
3224         uint32_t desc;
3225
3226         if (unlikely(offset >= rxq->nb_rx_desc))
3227                 return 0;
3228         desc = rxq->rx_tail + offset;
3229         if (desc >= rxq->nb_rx_desc)
3230                 desc -= rxq->nb_rx_desc;
3231
3232         rxdp = &rxq->rx_ring[desc];
3233         return !!(rxdp->wb.upper.status_error &
3234                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3235 }
3236
3237 int
3238 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3239 {
3240         struct ixgbe_rx_queue *rxq = rx_queue;
3241         volatile uint32_t *status;
3242         uint32_t nb_hold, desc;
3243
3244         if (unlikely(offset >= rxq->nb_rx_desc))
3245                 return -EINVAL;
3246
3247 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3248         if (rxq->rx_using_sse)
3249                 nb_hold = rxq->rxrearm_nb;
3250         else
3251 #endif
3252                 nb_hold = rxq->nb_rx_hold;
3253         if (offset >= rxq->nb_rx_desc - nb_hold)
3254                 return RTE_ETH_RX_DESC_UNAVAIL;
3255
3256         desc = rxq->rx_tail + offset;
3257         if (desc >= rxq->nb_rx_desc)
3258                 desc -= rxq->nb_rx_desc;
3259
3260         status = &rxq->rx_ring[desc].wb.upper.status_error;
3261         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3262                 return RTE_ETH_RX_DESC_DONE;
3263
3264         return RTE_ETH_RX_DESC_AVAIL;
3265 }
3266
3267 int
3268 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3269 {
3270         struct ixgbe_tx_queue *txq = tx_queue;
3271         volatile uint32_t *status;
3272         uint32_t desc;
3273
3274         if (unlikely(offset >= txq->nb_tx_desc))
3275                 return -EINVAL;
3276
3277         desc = txq->tx_tail + offset;
3278         /* go to next desc that has the RS bit */
3279         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3280                 txq->tx_rs_thresh;
3281         if (desc >= txq->nb_tx_desc) {
3282                 desc -= txq->nb_tx_desc;
3283                 if (desc >= txq->nb_tx_desc)
3284                         desc -= txq->nb_tx_desc;
3285         }
3286
3287         status = &txq->tx_ring[desc].wb.status;
3288         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3289                 return RTE_ETH_TX_DESC_DONE;
3290
3291         return RTE_ETH_TX_DESC_FULL;
3292 }
3293
3294 /*
3295  * Set up link loopback for X540/X550 mode Tx->Rx.
3296  */
3297 static inline void __attribute__((cold))
3298 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3299 {
3300         uint32_t macc;
3301         PMD_INIT_FUNC_TRACE();
3302
3303         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3304
3305         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3306                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3307         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3308
3309         if (enable) {
3310                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3311                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3312                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3313                 macc |= IXGBE_MACC_FLU;
3314         } else {
3315                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3316                 macc &= ~IXGBE_MACC_FLU;
3317         }
3318
3319         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3320                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3321
3322         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3323 }
3324
3325 void __attribute__((cold))
3326 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3327 {
3328         unsigned i;
3329         struct ixgbe_adapter *adapter = dev->data->dev_private;
3330         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3331
3332         PMD_INIT_FUNC_TRACE();
3333
3334         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3335                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3336
3337                 if (txq != NULL) {
3338                         txq->ops->release_mbufs(txq);
3339                         txq->ops->reset(txq);
3340                 }
3341         }
3342
3343         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3344                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3345
3346                 if (rxq != NULL) {
3347                         ixgbe_rx_queue_release_mbufs(rxq);
3348                         ixgbe_reset_rx_queue(adapter, rxq);
3349                 }
3350         }
3351         /* If loopback mode was enabled, reconfigure the link accordingly */
3352         if (dev->data->dev_conf.lpbk_mode != 0) {
3353                 if (hw->mac.type == ixgbe_mac_X540 ||
3354                      hw->mac.type == ixgbe_mac_X550 ||
3355                      hw->mac.type == ixgbe_mac_X550EM_x ||
3356                      hw->mac.type == ixgbe_mac_X550EM_a)
3357                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3358         }
3359 }
3360
3361 void
3362 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3363 {
3364         unsigned i;
3365
3366         PMD_INIT_FUNC_TRACE();
3367
3368         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3369                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3370                 dev->data->rx_queues[i] = NULL;
3371         }
3372         dev->data->nb_rx_queues = 0;
3373
3374         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3375                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3376                 dev->data->tx_queues[i] = NULL;
3377         }
3378         dev->data->nb_tx_queues = 0;
3379 }
3380
3381 /*********************************************************************
3382  *
3383  *  Device RX/TX init functions
3384  *
3385  **********************************************************************/
3386
3387 /**
3388  * Receive Side Scaling (RSS)
3389  * See section 7.1.2.8 in the following document:
3390  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3391  *
3392  * Principles:
3393  * The source and destination IP addresses of the IP header and the source
3394  * and destination ports of TCP/UDP headers, if any, of received packets are
3395  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3396  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3397  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3398  * RSS output index which is used as the RX queue index where to store the
3399  * received packets.
3400  * The following output is supplied in the RX write-back descriptor:
3401  *     - 32-bit result of the Microsoft RSS hash function,
3402  *     - 4-bit RSS type field.
3403  */
3404
3405 /*
3406  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3407  * Used as the default key.
3408  */
3409 static uint8_t rss_intel_key[40] = {
3410         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3411         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3412         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3413         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3414         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3415 };
3416
3417 static void
3418 ixgbe_rss_disable(struct rte_eth_dev *dev)
3419 {
3420         struct ixgbe_hw *hw;
3421         uint32_t mrqc;
3422         uint32_t mrqc_reg;
3423
3424         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3425         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3426         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3427         mrqc &= ~IXGBE_MRQC_RSSEN;
3428         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3429 }
3430
3431 static void
3432 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3433 {
3434         uint8_t  *hash_key;
3435         uint32_t mrqc;
3436         uint32_t rss_key;
3437         uint64_t rss_hf;
3438         uint16_t i;
3439         uint32_t mrqc_reg;
3440         uint32_t rssrk_reg;
3441
3442         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3443         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3444
3445         hash_key = rss_conf->rss_key;
3446         if (hash_key != NULL) {
3447                 /* Fill in RSS hash key */
3448                 for (i = 0; i < 10; i++) {
3449                         rss_key  = hash_key[(i * 4)];
3450                         rss_key |= hash_key[(i * 4) + 1] << 8;
3451                         rss_key |= hash_key[(i * 4) + 2] << 16;
3452                         rss_key |= hash_key[(i * 4) + 3] << 24;
3453                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3454                 }
3455         }
3456
3457         /* Set configured hashing protocols in MRQC register */
3458         rss_hf = rss_conf->rss_hf;
3459         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3460         if (rss_hf & ETH_RSS_IPV4)
3461                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3462         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3463                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3464         if (rss_hf & ETH_RSS_IPV6)
3465                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3466         if (rss_hf & ETH_RSS_IPV6_EX)
3467                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3468         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3469                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3470         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3471                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3472         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3473                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3474         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3475                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3476         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3477                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3478         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3479 }
3480
3481 int
3482 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3483                           struct rte_eth_rss_conf *rss_conf)
3484 {
3485         struct ixgbe_hw *hw;
3486         uint32_t mrqc;
3487         uint64_t rss_hf;
3488         uint32_t mrqc_reg;
3489
3490         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3491
3492         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3493                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3494                         "NIC.");
3495                 return -ENOTSUP;
3496         }
3497         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3498
3499         /*
3500          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3501          *     "RSS enabling cannot be done dynamically while it must be
3502          *      preceded by a software reset"
3503          * Before changing anything, first check that the update RSS operation
3504          * does not attempt to disable RSS, if RSS was enabled at
3505          * initialization time, or does not attempt to enable RSS, if RSS was
3506          * disabled at initialization time.
3507          */
3508         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3509         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3510         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3511                 if (rss_hf != 0) /* Enable RSS */
3512                         return -(EINVAL);
3513                 return 0; /* Nothing to do */
3514         }
3515         /* RSS enabled */
3516         if (rss_hf == 0) /* Disable RSS */
3517                 return -(EINVAL);
3518         ixgbe_hw_rss_hash_set(hw, rss_conf);
3519         return 0;
3520 }
3521
3522 int
3523 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3524                             struct rte_eth_rss_conf *rss_conf)
3525 {
3526         struct ixgbe_hw *hw;
3527         uint8_t *hash_key;
3528         uint32_t mrqc;
3529         uint32_t rss_key;
3530         uint64_t rss_hf;
3531         uint16_t i;
3532         uint32_t mrqc_reg;
3533         uint32_t rssrk_reg;
3534
3535         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3536         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3537         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3538         hash_key = rss_conf->rss_key;
3539         if (hash_key != NULL) {
3540                 /* Return RSS hash key */
3541                 for (i = 0; i < 10; i++) {
3542                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3543                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3544                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3545                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3546                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3547                 }
3548         }
3549
3550         /* Get RSS functions configured in MRQC register */
3551         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3552         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3553                 rss_conf->rss_hf = 0;
3554                 return 0;
3555         }
3556         rss_hf = 0;
3557         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3558                 rss_hf |= ETH_RSS_IPV4;
3559         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3560                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3561         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3562                 rss_hf |= ETH_RSS_IPV6;
3563         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3564                 rss_hf |= ETH_RSS_IPV6_EX;
3565         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3566                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3567         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3568                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3569         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3570                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3571         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3572                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3573         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3574                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3575         rss_conf->rss_hf = rss_hf;
3576         return 0;
3577 }
3578
3579 static void
3580 ixgbe_rss_configure(struct rte_eth_dev *dev)
3581 {
3582         struct rte_eth_rss_conf rss_conf;
3583         struct ixgbe_adapter *adapter;
3584         struct ixgbe_hw *hw;
3585         uint32_t reta;
3586         uint16_t i;
3587         uint16_t j;
3588         uint16_t sp_reta_size;
3589         uint32_t reta_reg;
3590
3591         PMD_INIT_FUNC_TRACE();
3592         adapter = dev->data->dev_private;
3593         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3594
3595         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3596
3597         /*
3598          * Fill in redirection table
3599          * The byte-swap is needed because NIC registers are in
3600          * little-endian order.
3601          */
3602         if (adapter->rss_reta_updated == 0) {
3603                 reta = 0;
3604                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3605                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3606
3607                         if (j == dev->data->nb_rx_queues)
3608                                 j = 0;
3609                         reta = (reta << 8) | j;
3610                         if ((i & 3) == 3)
3611                                 IXGBE_WRITE_REG(hw, reta_reg,
3612                                                 rte_bswap32(reta));
3613                 }
3614         }
3615
3616         /*
3617          * Configure the RSS key and the RSS protocols used to compute
3618          * the RSS hash of input packets.
3619          */
3620         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3621         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3622                 ixgbe_rss_disable(dev);
3623                 return;
3624         }
3625         if (rss_conf.rss_key == NULL)
3626                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3627         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3628 }
3629
3630 #define NUM_VFTA_REGISTERS 128
3631 #define NIC_RX_BUFFER_SIZE 0x200
3632 #define X550_RX_BUFFER_SIZE 0x180
3633
3634 static void
3635 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3636 {
3637         struct rte_eth_vmdq_dcb_conf *cfg;
3638         struct ixgbe_hw *hw;
3639         enum rte_eth_nb_pools num_pools;
3640         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3641         uint16_t pbsize;
3642         uint8_t nb_tcs; /* number of traffic classes */
3643         int i;
3644
3645         PMD_INIT_FUNC_TRACE();
3646         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3647         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3648         num_pools = cfg->nb_queue_pools;
3649         /* Check we have a valid number of pools */
3650         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3651                 ixgbe_rss_disable(dev);
3652                 return;
3653         }
3654         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3655         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3656
3657         /*
3658          * RXPBSIZE
3659          * split rx buffer up into sections, each for 1 traffic class
3660          */
3661         switch (hw->mac.type) {
3662         case ixgbe_mac_X550:
3663         case ixgbe_mac_X550EM_x:
3664         case ixgbe_mac_X550EM_a:
3665                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3666                 break;
3667         default:
3668                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3669                 break;
3670         }
3671         for (i = 0; i < nb_tcs; i++) {
3672                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3673
3674                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3675                 /* clear 10 bits. */
3676                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3677                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3678         }
3679         /* zero alloc all unused TCs */
3680         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3681                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3682
3683                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3684                 /* clear 10 bits. */
3685                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3686         }
3687
3688         /* MRQC: enable vmdq and dcb */
3689         mrqc = (num_pools == ETH_16_POOLS) ?
3690                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3691         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3692
3693         /* PFVTCTL: turn on virtualisation and set the default pool */
3694         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3695         if (cfg->enable_default_pool) {
3696                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3697         } else {
3698                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3699         }
3700
3701         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3702
3703         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3704         queue_mapping = 0;
3705         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3706                 /*
3707                  * mapping is done with 3 bits per priority,
3708                  * so shift by i*3 each time
3709                  */
3710                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3711
3712         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3713
3714         /* RTRPCS: DCB related */
3715         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3716
3717         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3718         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3719         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3720         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3721
3722         /* VFTA - enable all vlan filters */
3723         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3724                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3725         }
3726
3727         /* VFRE: pool enabling for receive - 16 or 32 */
3728         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3729                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3730
3731         /*
3732          * MPSAR - allow pools to read specific mac addresses
3733          * In this case, all pools should be able to read from mac addr 0
3734          */
3735         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3736         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3737
3738         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3739         for (i = 0; i < cfg->nb_pool_maps; i++) {
3740                 /* set vlan id in VF register and set the valid bit */
3741                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3742                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3743                 /*
3744                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3745                  * pools, we only need to use the first half of the register
3746                  * i.e. bits 0-31
3747                  */
3748                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3749         }
3750 }
3751
3752 /**
3753  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3754  * @dev: pointer to eth_dev structure
3755  * @dcb_config: pointer to ixgbe_dcb_config structure
3756  */
3757 static void
3758 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3759                        struct ixgbe_dcb_config *dcb_config)
3760 {
3761         uint32_t reg;
3762         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3763
3764         PMD_INIT_FUNC_TRACE();
3765         if (hw->mac.type != ixgbe_mac_82598EB) {
3766                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3767                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3768                 reg |= IXGBE_RTTDCS_ARBDIS;
3769                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3770
3771                 /* Enable DCB for Tx with 8 TCs */
3772                 if (dcb_config->num_tcs.pg_tcs == 8) {
3773                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3774                 } else {
3775                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3776                 }
3777                 if (dcb_config->vt_mode)
3778                         reg |= IXGBE_MTQC_VT_ENA;
3779                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3780
3781                 /* Enable the Tx desc arbiter */
3782                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3783                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3784                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3785
3786                 /* Enable Security TX Buffer IFG for DCB */
3787                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3788                 reg |= IXGBE_SECTX_DCB;
3789                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3790         }
3791 }
3792
3793 /**
3794  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3795  * @dev: pointer to rte_eth_dev structure
3796  * @dcb_config: pointer to ixgbe_dcb_config structure
3797  */
3798 static void
3799 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3800                         struct ixgbe_dcb_config *dcb_config)
3801 {
3802         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3803                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3804         struct ixgbe_hw *hw =
3805                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3806
3807         PMD_INIT_FUNC_TRACE();
3808         if (hw->mac.type != ixgbe_mac_82598EB)
3809                 /*PF VF Transmit Enable*/
3810                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3811                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3812
3813         /*Configure general DCB TX parameters*/
3814         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3815 }
3816
3817 static void
3818 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3819                         struct ixgbe_dcb_config *dcb_config)
3820 {
3821         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3822                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3823         struct ixgbe_dcb_tc_config *tc;
3824         uint8_t i, j;
3825
3826         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3827         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3828                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3829                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3830         } else {
3831                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3832                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3833         }
3834
3835         /* Initialize User Priority to Traffic Class mapping */
3836         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3837                 tc = &dcb_config->tc_config[j];
3838                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3839         }
3840
3841         /* User Priority to Traffic Class mapping */
3842         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3843                 j = vmdq_rx_conf->dcb_tc[i];
3844                 tc = &dcb_config->tc_config[j];
3845                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3846                                                 (uint8_t)(1 << i);
3847         }
3848 }
3849
3850 static void
3851 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3852                         struct ixgbe_dcb_config *dcb_config)
3853 {
3854         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3855                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3856         struct ixgbe_dcb_tc_config *tc;
3857         uint8_t i, j;
3858
3859         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3860         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3861                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3862                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3863         } else {
3864                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3865                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3866         }
3867
3868         /* Initialize User Priority to Traffic Class mapping */
3869         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3870                 tc = &dcb_config->tc_config[j];
3871                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3872         }
3873
3874         /* User Priority to Traffic Class mapping */
3875         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3876                 j = vmdq_tx_conf->dcb_tc[i];
3877                 tc = &dcb_config->tc_config[j];
3878                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3879                                                 (uint8_t)(1 << i);
3880         }
3881 }
3882
3883 static void
3884 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3885                 struct ixgbe_dcb_config *dcb_config)
3886 {
3887         struct rte_eth_dcb_rx_conf *rx_conf =
3888                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3889         struct ixgbe_dcb_tc_config *tc;
3890         uint8_t i, j;
3891
3892         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3893         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3894
3895         /* Initialize User Priority to Traffic Class mapping */
3896         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3897                 tc = &dcb_config->tc_config[j];
3898                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3899         }
3900
3901         /* User Priority to Traffic Class mapping */
3902         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3903                 j = rx_conf->dcb_tc[i];
3904                 tc = &dcb_config->tc_config[j];
3905                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3906                                                 (uint8_t)(1 << i);
3907         }
3908 }
3909
3910 static void
3911 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3912                 struct ixgbe_dcb_config *dcb_config)
3913 {
3914         struct rte_eth_dcb_tx_conf *tx_conf =
3915                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3916         struct ixgbe_dcb_tc_config *tc;
3917         uint8_t i, j;
3918
3919         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3920         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3921
3922         /* Initialize User Priority to Traffic Class mapping */
3923         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3924                 tc = &dcb_config->tc_config[j];
3925                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3926         }
3927
3928         /* User Priority to Traffic Class mapping */
3929         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3930                 j = tx_conf->dcb_tc[i];
3931                 tc = &dcb_config->tc_config[j];
3932                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3933                                                 (uint8_t)(1 << i);
3934         }
3935 }
3936
3937 /**
3938  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3939  * @dev: pointer to eth_dev structure
3940  * @dcb_config: pointer to ixgbe_dcb_config structure
3941  */
3942 static void
3943 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3944                        struct ixgbe_dcb_config *dcb_config)
3945 {
3946         uint32_t reg;
3947         uint32_t vlanctrl;
3948         uint8_t i;
3949         uint32_t q;
3950         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3951
3952         PMD_INIT_FUNC_TRACE();
3953         /*
3954          * Disable the arbiter before changing parameters
3955          * (always enable recycle mode; WSP)
3956          */
3957         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3958         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3959
3960         if (hw->mac.type != ixgbe_mac_82598EB) {
3961                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3962                 if (dcb_config->num_tcs.pg_tcs == 4) {
3963                         if (dcb_config->vt_mode)
3964                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3965                                         IXGBE_MRQC_VMDQRT4TCEN;
3966                         else {
3967                                 /* no matter the mode is DCB or DCB_RSS, just
3968                                  * set the MRQE to RSSXTCEN. RSS is controlled
3969                                  * by RSS_FIELD
3970                                  */
3971                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3972                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3973                                         IXGBE_MRQC_RTRSS4TCEN;
3974                         }
3975                 }
3976                 if (dcb_config->num_tcs.pg_tcs == 8) {
3977                         if (dcb_config->vt_mode)
3978                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3979                                         IXGBE_MRQC_VMDQRT8TCEN;
3980                         else {
3981                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3982                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3983                                         IXGBE_MRQC_RTRSS8TCEN;
3984                         }
3985                 }
3986
3987                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3988
3989                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3990                         /* Disable drop for all queues in VMDQ mode*/
3991                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3992                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3993                                                 (IXGBE_QDE_WRITE |
3994                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3995                 } else {
3996                         /* Enable drop for all queues in SRIOV mode */
3997                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3998                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3999                                                 (IXGBE_QDE_WRITE |
4000                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4001                                                  IXGBE_QDE_ENABLE));
4002                 }
4003         }
4004
4005         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4006         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4007         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4008         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4009
4010         /* VFTA - enable all vlan filters */
4011         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4012                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4013         }
4014
4015         /*
4016          * Configure Rx packet plane (recycle mode; WSP) and
4017          * enable arbiter
4018          */
4019         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4020         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4021 }
4022
4023 static void
4024 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4025                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4026 {
4027         switch (hw->mac.type) {
4028         case ixgbe_mac_82598EB:
4029                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4030                 break;
4031         case ixgbe_mac_82599EB:
4032         case ixgbe_mac_X540:
4033         case ixgbe_mac_X550:
4034         case ixgbe_mac_X550EM_x:
4035         case ixgbe_mac_X550EM_a:
4036                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4037                                                   tsa, map);
4038                 break;
4039         default:
4040                 break;
4041         }
4042 }
4043
4044 static void
4045 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4046                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4047 {
4048         switch (hw->mac.type) {
4049         case ixgbe_mac_82598EB:
4050                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4051                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4052                 break;
4053         case ixgbe_mac_82599EB:
4054         case ixgbe_mac_X540:
4055         case ixgbe_mac_X550:
4056         case ixgbe_mac_X550EM_x:
4057         case ixgbe_mac_X550EM_a:
4058                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4059                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4060                 break;
4061         default:
4062                 break;
4063         }
4064 }
4065
4066 #define DCB_RX_CONFIG  1
4067 #define DCB_TX_CONFIG  1
4068 #define DCB_TX_PB      1024
4069 /**
4070  * ixgbe_dcb_hw_configure - Enable DCB and configure
4071  * general DCB in VT mode and non-VT mode parameters
4072  * @dev: pointer to rte_eth_dev structure
4073  * @dcb_config: pointer to ixgbe_dcb_config structure
4074  */
4075 static int
4076 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4077                         struct ixgbe_dcb_config *dcb_config)
4078 {
4079         int     ret = 0;
4080         uint8_t i, pfc_en, nb_tcs;
4081         uint16_t pbsize, rx_buffer_size;
4082         uint8_t config_dcb_rx = 0;
4083         uint8_t config_dcb_tx = 0;
4084         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4085         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4086         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4087         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4088         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4089         struct ixgbe_dcb_tc_config *tc;
4090         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4091                 RTE_ETHER_CRC_LEN;
4092         struct ixgbe_hw *hw =
4093                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4094         struct ixgbe_bw_conf *bw_conf =
4095                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4096
4097         switch (dev->data->dev_conf.rxmode.mq_mode) {
4098         case ETH_MQ_RX_VMDQ_DCB:
4099                 dcb_config->vt_mode = true;
4100                 if (hw->mac.type != ixgbe_mac_82598EB) {
4101                         config_dcb_rx = DCB_RX_CONFIG;
4102                         /*
4103                          *get dcb and VT rx configuration parameters
4104                          *from rte_eth_conf
4105                          */
4106                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4107                         /*Configure general VMDQ and DCB RX parameters*/
4108                         ixgbe_vmdq_dcb_configure(dev);
4109                 }
4110                 break;
4111         case ETH_MQ_RX_DCB:
4112         case ETH_MQ_RX_DCB_RSS:
4113                 dcb_config->vt_mode = false;
4114                 config_dcb_rx = DCB_RX_CONFIG;
4115                 /* Get dcb TX configuration parameters from rte_eth_conf */
4116                 ixgbe_dcb_rx_config(dev, dcb_config);
4117                 /*Configure general DCB RX parameters*/
4118                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4119                 break;
4120         default:
4121                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4122                 break;
4123         }
4124         switch (dev->data->dev_conf.txmode.mq_mode) {
4125         case ETH_MQ_TX_VMDQ_DCB:
4126                 dcb_config->vt_mode = true;
4127                 config_dcb_tx = DCB_TX_CONFIG;
4128                 /* get DCB and VT TX configuration parameters
4129                  * from rte_eth_conf
4130                  */
4131                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4132                 /*Configure general VMDQ and DCB TX parameters*/
4133                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4134                 break;
4135
4136         case ETH_MQ_TX_DCB:
4137                 dcb_config->vt_mode = false;
4138                 config_dcb_tx = DCB_TX_CONFIG;
4139                 /*get DCB TX configuration parameters from rte_eth_conf*/
4140                 ixgbe_dcb_tx_config(dev, dcb_config);
4141                 /*Configure general DCB TX parameters*/
4142                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4143                 break;
4144         default:
4145                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4146                 break;
4147         }
4148
4149         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4150         /* Unpack map */
4151         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4152         if (nb_tcs == ETH_4_TCS) {
4153                 /* Avoid un-configured priority mapping to TC0 */
4154                 uint8_t j = 4;
4155                 uint8_t mask = 0xFF;
4156
4157                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4158                         mask = (uint8_t)(mask & (~(1 << map[i])));
4159                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4160                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4161                                 map[j++] = i;
4162                         mask >>= 1;
4163                 }
4164                 /* Re-configure 4 TCs BW */
4165                 for (i = 0; i < nb_tcs; i++) {
4166                         tc = &dcb_config->tc_config[i];
4167                         if (bw_conf->tc_num != nb_tcs)
4168                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4169                                         (uint8_t)(100 / nb_tcs);
4170                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4171                                                 (uint8_t)(100 / nb_tcs);
4172                 }
4173                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4174                         tc = &dcb_config->tc_config[i];
4175                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4176                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4177                 }
4178         } else {
4179                 /* Re-configure 8 TCs BW */
4180                 for (i = 0; i < nb_tcs; i++) {
4181                         tc = &dcb_config->tc_config[i];
4182                         if (bw_conf->tc_num != nb_tcs)
4183                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4184                                         (uint8_t)(100 / nb_tcs + (i & 1));
4185                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4186                                 (uint8_t)(100 / nb_tcs + (i & 1));
4187                 }
4188         }
4189
4190         switch (hw->mac.type) {
4191         case ixgbe_mac_X550:
4192         case ixgbe_mac_X550EM_x:
4193         case ixgbe_mac_X550EM_a:
4194                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4195                 break;
4196         default:
4197                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4198                 break;
4199         }
4200
4201         if (config_dcb_rx) {
4202                 /* Set RX buffer size */
4203                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4204                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4205
4206                 for (i = 0; i < nb_tcs; i++) {
4207                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4208                 }
4209                 /* zero alloc all unused TCs */
4210                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4211                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4212                 }
4213         }
4214         if (config_dcb_tx) {
4215                 /* Only support an equally distributed
4216                  *  Tx packet buffer strategy.
4217                  */
4218                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4219                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4220
4221                 for (i = 0; i < nb_tcs; i++) {
4222                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4223                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4224                 }
4225                 /* Clear unused TCs, if any, to zero buffer size*/
4226                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4227                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4228                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4229                 }
4230         }
4231
4232         /*Calculates traffic class credits*/
4233         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4234                                 IXGBE_DCB_TX_CONFIG);
4235         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4236                                 IXGBE_DCB_RX_CONFIG);
4237
4238         if (config_dcb_rx) {
4239                 /* Unpack CEE standard containers */
4240                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4241                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4242                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4243                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4244                 /* Configure PG(ETS) RX */
4245                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4246         }
4247
4248         if (config_dcb_tx) {
4249                 /* Unpack CEE standard containers */
4250                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4251                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4252                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4253                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4254                 /* Configure PG(ETS) TX */
4255                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4256         }
4257
4258         /*Configure queue statistics registers*/
4259         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4260
4261         /* Check if the PFC is supported */
4262         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4263                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4264                 for (i = 0; i < nb_tcs; i++) {
4265                         /*
4266                         * If the TC count is 8,and the default high_water is 48,
4267                         * the low_water is 16 as default.
4268                         */
4269                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4270                         hw->fc.low_water[i] = pbsize / 4;
4271                         /* Enable pfc for this TC */
4272                         tc = &dcb_config->tc_config[i];
4273                         tc->pfc = ixgbe_dcb_pfc_enabled;
4274                 }
4275                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4276                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4277                         pfc_en &= 0x0F;
4278                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4279         }
4280
4281         return ret;
4282 }
4283
4284 /**
4285  * ixgbe_configure_dcb - Configure DCB  Hardware
4286  * @dev: pointer to rte_eth_dev
4287  */
4288 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4289 {
4290         struct ixgbe_dcb_config *dcb_cfg =
4291                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4292         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4293
4294         PMD_INIT_FUNC_TRACE();
4295
4296         /* check support mq_mode for DCB */
4297         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4298             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4299             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4300                 return;
4301
4302         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4303                 return;
4304
4305         /** Configure DCB hardware **/
4306         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4307 }
4308
4309 /*
4310  * VMDq only support for 10 GbE NIC.
4311  */
4312 static void
4313 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4314 {
4315         struct rte_eth_vmdq_rx_conf *cfg;
4316         struct ixgbe_hw *hw;
4317         enum rte_eth_nb_pools num_pools;
4318         uint32_t mrqc, vt_ctl, vlanctrl;
4319         uint32_t vmolr = 0;
4320         int i;
4321
4322         PMD_INIT_FUNC_TRACE();
4323         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4324         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4325         num_pools = cfg->nb_queue_pools;
4326
4327         ixgbe_rss_disable(dev);
4328
4329         /* MRQC: enable vmdq */
4330         mrqc = IXGBE_MRQC_VMDQEN;
4331         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4332
4333         /* PFVTCTL: turn on virtualisation and set the default pool */
4334         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4335         if (cfg->enable_default_pool)
4336                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4337         else
4338                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4339
4340         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4341
4342         for (i = 0; i < (int)num_pools; i++) {
4343                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4344                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4345         }
4346
4347         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4348         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4349         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4350         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4351
4352         /* VFTA - enable all vlan filters */
4353         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4354                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4355
4356         /* VFRE: pool enabling for receive - 64 */
4357         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4358         if (num_pools == ETH_64_POOLS)
4359                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4360
4361         /*
4362          * MPSAR - allow pools to read specific mac addresses
4363          * In this case, all pools should be able to read from mac addr 0
4364          */
4365         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4366         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4367
4368         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4369         for (i = 0; i < cfg->nb_pool_maps; i++) {
4370                 /* set vlan id in VF register and set the valid bit */
4371                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4372                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4373                 /*
4374                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4375                  * pools, we only need to use the first half of the register
4376                  * i.e. bits 0-31
4377                  */
4378                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4379                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4380                                         (cfg->pool_map[i].pools & UINT32_MAX));
4381                 else
4382                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4383                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4384
4385         }
4386
4387         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4388         if (cfg->enable_loop_back) {
4389                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4390                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4391                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4392         }
4393
4394         IXGBE_WRITE_FLUSH(hw);
4395 }
4396
4397 /*
4398  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4399  * @hw: pointer to hardware structure
4400  */
4401 static void
4402 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4403 {
4404         uint32_t reg;
4405         uint32_t q;
4406
4407         PMD_INIT_FUNC_TRACE();
4408         /*PF VF Transmit Enable*/
4409         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4410         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4411
4412         /* Disable the Tx desc arbiter so that MTQC can be changed */
4413         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4414         reg |= IXGBE_RTTDCS_ARBDIS;
4415         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4416
4417         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4418         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4419
4420         /* Disable drop for all queues */
4421         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4422                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4423                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4424
4425         /* Enable the Tx desc arbiter */
4426         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4427         reg &= ~IXGBE_RTTDCS_ARBDIS;
4428         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4429
4430         IXGBE_WRITE_FLUSH(hw);
4431 }
4432
4433 static int __attribute__((cold))
4434 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4435 {
4436         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4437         uint64_t dma_addr;
4438         unsigned int i;
4439
4440         /* Initialize software ring entries */
4441         for (i = 0; i < rxq->nb_rx_desc; i++) {
4442                 volatile union ixgbe_adv_rx_desc *rxd;
4443                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4444
4445                 if (mbuf == NULL) {
4446                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4447                                      (unsigned) rxq->queue_id);
4448                         return -ENOMEM;
4449                 }
4450
4451                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4452                 mbuf->port = rxq->port_id;
4453
4454                 dma_addr =
4455                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4456                 rxd = &rxq->rx_ring[i];
4457                 rxd->read.hdr_addr = 0;
4458                 rxd->read.pkt_addr = dma_addr;
4459                 rxe[i].mbuf = mbuf;
4460         }
4461
4462         return 0;
4463 }
4464
4465 static int
4466 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4467 {
4468         struct ixgbe_hw *hw;
4469         uint32_t mrqc;
4470
4471         ixgbe_rss_configure(dev);
4472
4473         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4474
4475         /* MRQC: enable VF RSS */
4476         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4477         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4478         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4479         case ETH_64_POOLS:
4480                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4481                 break;
4482
4483         case ETH_32_POOLS:
4484                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4485                 break;
4486
4487         default:
4488                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4489                 return -EINVAL;
4490         }
4491
4492         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4493
4494         return 0;
4495 }
4496
4497 static int
4498 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4499 {
4500         struct ixgbe_hw *hw =
4501                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4502
4503         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4504         case ETH_64_POOLS:
4505                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4506                         IXGBE_MRQC_VMDQEN);
4507                 break;
4508
4509         case ETH_32_POOLS:
4510                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4511                         IXGBE_MRQC_VMDQRT4TCEN);
4512                 break;
4513
4514         case ETH_16_POOLS:
4515                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4516                         IXGBE_MRQC_VMDQRT8TCEN);
4517                 break;
4518         default:
4519                 PMD_INIT_LOG(ERR,
4520                         "invalid pool number in IOV mode");
4521                 break;
4522         }
4523         return 0;
4524 }
4525
4526 static int
4527 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4528 {
4529         struct ixgbe_hw *hw =
4530                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4531
4532         if (hw->mac.type == ixgbe_mac_82598EB)
4533                 return 0;
4534
4535         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4536                 /*
4537                  * SRIOV inactive scheme
4538                  * any DCB/RSS w/o VMDq multi-queue setting
4539                  */
4540                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4541                 case ETH_MQ_RX_RSS:
4542                 case ETH_MQ_RX_DCB_RSS:
4543                 case ETH_MQ_RX_VMDQ_RSS:
4544                         ixgbe_rss_configure(dev);
4545                         break;
4546
4547                 case ETH_MQ_RX_VMDQ_DCB:
4548                         ixgbe_vmdq_dcb_configure(dev);
4549                         break;
4550
4551                 case ETH_MQ_RX_VMDQ_ONLY:
4552                         ixgbe_vmdq_rx_hw_configure(dev);
4553                         break;
4554
4555                 case ETH_MQ_RX_NONE:
4556                 default:
4557                         /* if mq_mode is none, disable rss mode.*/
4558                         ixgbe_rss_disable(dev);
4559                         break;
4560                 }
4561         } else {
4562                 /* SRIOV active scheme
4563                  * Support RSS together with SRIOV.
4564                  */
4565                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4566                 case ETH_MQ_RX_RSS:
4567                 case ETH_MQ_RX_VMDQ_RSS:
4568                         ixgbe_config_vf_rss(dev);
4569                         break;
4570                 case ETH_MQ_RX_VMDQ_DCB:
4571                 case ETH_MQ_RX_DCB:
4572                 /* In SRIOV, the configuration is the same as VMDq case */
4573                         ixgbe_vmdq_dcb_configure(dev);
4574                         break;
4575                 /* DCB/RSS together with SRIOV is not supported */
4576                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4577                 case ETH_MQ_RX_DCB_RSS:
4578                         PMD_INIT_LOG(ERR,
4579                                 "Could not support DCB/RSS with VMDq & SRIOV");
4580                         return -1;
4581                 default:
4582                         ixgbe_config_vf_default(dev);
4583                         break;
4584                 }
4585         }
4586
4587         return 0;
4588 }
4589
4590 static int
4591 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4592 {
4593         struct ixgbe_hw *hw =
4594                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4595         uint32_t mtqc;
4596         uint32_t rttdcs;
4597
4598         if (hw->mac.type == ixgbe_mac_82598EB)
4599                 return 0;
4600
4601         /* disable arbiter before setting MTQC */
4602         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4603         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4604         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4605
4606         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4607                 /*
4608                  * SRIOV inactive scheme
4609                  * any DCB w/o VMDq multi-queue setting
4610                  */
4611                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4612                         ixgbe_vmdq_tx_hw_configure(hw);
4613                 else {
4614                         mtqc = IXGBE_MTQC_64Q_1PB;
4615                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4616                 }
4617         } else {
4618                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4619
4620                 /*
4621                  * SRIOV active scheme
4622                  * FIXME if support DCB together with VMDq & SRIOV
4623                  */
4624                 case ETH_64_POOLS:
4625                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4626                         break;
4627                 case ETH_32_POOLS:
4628                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4629                         break;
4630                 case ETH_16_POOLS:
4631                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4632                                 IXGBE_MTQC_8TC_8TQ;
4633                         break;
4634                 default:
4635                         mtqc = IXGBE_MTQC_64Q_1PB;
4636                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4637                 }
4638                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4639         }
4640
4641         /* re-enable arbiter */
4642         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4643         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4644
4645         return 0;
4646 }
4647
4648 /**
4649  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4650  *
4651  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4652  * spec rev. 3.0 chapter 8.2.3.8.13.
4653  *
4654  * @pool Memory pool of the Rx queue
4655  */
4656 static inline uint32_t
4657 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4658 {
4659         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4660
4661         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4662         uint16_t maxdesc =
4663                 RTE_IPV4_MAX_PKT_LEN /
4664                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4665
4666         if (maxdesc >= 16)
4667                 return IXGBE_RSCCTL_MAXDESC_16;
4668         else if (maxdesc >= 8)
4669                 return IXGBE_RSCCTL_MAXDESC_8;
4670         else if (maxdesc >= 4)
4671                 return IXGBE_RSCCTL_MAXDESC_4;
4672         else
4673                 return IXGBE_RSCCTL_MAXDESC_1;
4674 }
4675
4676 /**
4677  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4678  * interrupt
4679  *
4680  * (Taken from FreeBSD tree)
4681  * (yes this is all very magic and confusing :)
4682  *
4683  * @dev port handle
4684  * @entry the register array entry
4685  * @vector the MSIX vector for this queue
4686  * @type RX/TX/MISC
4687  */
4688 static void
4689 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4690 {
4691         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4692         u32 ivar, index;
4693
4694         vector |= IXGBE_IVAR_ALLOC_VAL;
4695
4696         switch (hw->mac.type) {
4697
4698         case ixgbe_mac_82598EB:
4699                 if (type == -1)
4700                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4701                 else
4702                         entry += (type * 64);
4703                 index = (entry >> 2) & 0x1F;
4704                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4705                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4706                 ivar |= (vector << (8 * (entry & 0x3)));
4707                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4708                 break;
4709
4710         case ixgbe_mac_82599EB:
4711         case ixgbe_mac_X540:
4712                 if (type == -1) { /* MISC IVAR */
4713                         index = (entry & 1) * 8;
4714                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4715                         ivar &= ~(0xFF << index);
4716                         ivar |= (vector << index);
4717                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4718                 } else {        /* RX/TX IVARS */
4719                         index = (16 * (entry & 1)) + (8 * type);
4720                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4721                         ivar &= ~(0xFF << index);
4722                         ivar |= (vector << index);
4723                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4724                 }
4725
4726                 break;
4727
4728         default:
4729                 break;
4730         }
4731 }
4732
4733 void __attribute__((cold))
4734 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4735 {
4736         uint16_t i, rx_using_sse;
4737         struct ixgbe_adapter *adapter = dev->data->dev_private;
4738
4739         /*
4740          * In order to allow Vector Rx there are a few configuration
4741          * conditions to be met and Rx Bulk Allocation should be allowed.
4742          */
4743         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4744             !adapter->rx_bulk_alloc_allowed) {
4745                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4746                                     "preconditions",
4747                              dev->data->port_id);
4748
4749                 adapter->rx_vec_allowed = false;
4750         }
4751
4752         /*
4753          * Initialize the appropriate LRO callback.
4754          *
4755          * If all queues satisfy the bulk allocation preconditions
4756          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4757          * Otherwise use a single allocation version.
4758          */
4759         if (dev->data->lro) {
4760                 if (adapter->rx_bulk_alloc_allowed) {
4761                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4762                                            "allocation version");
4763                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4764                 } else {
4765                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4766                                            "allocation version");
4767                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4768                 }
4769         } else if (dev->data->scattered_rx) {
4770                 /*
4771                  * Set the non-LRO scattered callback: there are Vector and
4772                  * single allocation versions.
4773                  */
4774                 if (adapter->rx_vec_allowed) {
4775                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4776                                             "callback (port=%d).",
4777                                      dev->data->port_id);
4778
4779                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4780                 } else if (adapter->rx_bulk_alloc_allowed) {
4781                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4782                                            "allocation callback (port=%d).",
4783                                      dev->data->port_id);
4784                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4785                 } else {
4786                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4787                                             "single allocation) "
4788                                             "Scattered Rx callback "
4789                                             "(port=%d).",
4790                                      dev->data->port_id);
4791
4792                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4793                 }
4794         /*
4795          * Below we set "simple" callbacks according to port/queues parameters.
4796          * If parameters allow we are going to choose between the following
4797          * callbacks:
4798          *    - Vector
4799          *    - Bulk Allocation
4800          *    - Single buffer allocation (the simplest one)
4801          */
4802         } else if (adapter->rx_vec_allowed) {
4803                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4804                                     "burst size no less than %d (port=%d).",
4805                              RTE_IXGBE_DESCS_PER_LOOP,
4806                              dev->data->port_id);
4807
4808                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4809         } else if (adapter->rx_bulk_alloc_allowed) {
4810                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4811                                     "satisfied. Rx Burst Bulk Alloc function "
4812                                     "will be used on port=%d.",
4813                              dev->data->port_id);
4814
4815                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4816         } else {
4817                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4818                                     "satisfied, or Scattered Rx is requested "
4819                                     "(port=%d).",
4820                              dev->data->port_id);
4821
4822                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4823         }
4824
4825         /* Propagate information about RX function choice through all queues. */
4826
4827         rx_using_sse =
4828                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4829                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4830
4831         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4832                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4833
4834                 rxq->rx_using_sse = rx_using_sse;
4835 #ifdef RTE_LIBRTE_SECURITY
4836                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4837                                 DEV_RX_OFFLOAD_SECURITY);
4838 #endif
4839         }
4840 }
4841
4842 /**
4843  * ixgbe_set_rsc - configure RSC related port HW registers
4844  *
4845  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4846  * of 82599 Spec (x540 configuration is virtually the same).
4847  *
4848  * @dev port handle
4849  *
4850  * Returns 0 in case of success or a non-zero error code
4851  */
4852 static int
4853 ixgbe_set_rsc(struct rte_eth_dev *dev)
4854 {
4855         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4856         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4857         struct rte_eth_dev_info dev_info = { 0 };
4858         bool rsc_capable = false;
4859         uint16_t i;
4860         uint32_t rdrxctl;
4861         uint32_t rfctl;
4862
4863         /* Sanity check */
4864         dev->dev_ops->dev_infos_get(dev, &dev_info);
4865         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4866                 rsc_capable = true;
4867
4868         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4869                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4870                                    "support it");
4871                 return -EINVAL;
4872         }
4873
4874         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4875
4876         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4877              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4878                 /*
4879                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4880                  * 3.0 RSC configuration requires HW CRC stripping being
4881                  * enabled. If user requested both HW CRC stripping off
4882                  * and RSC on - return an error.
4883                  */
4884                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4885                                     "is disabled");
4886                 return -EINVAL;
4887         }
4888
4889         /* RFCTL configuration  */
4890         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4891         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4892                 /*
4893                  * Since NFS packets coalescing is not supported - clear
4894                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4895                  * enabled.
4896                  */
4897                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4898                            IXGBE_RFCTL_NFSR_DIS);
4899         else
4900                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4901         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4902
4903         /* If LRO hasn't been requested - we are done here. */
4904         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4905                 return 0;
4906
4907         /* Set RDRXCTL.RSCACKC bit */
4908         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4909         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4910         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4911
4912         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4913         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4914                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4915                 uint32_t srrctl =
4916                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4917                 uint32_t rscctl =
4918                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4919                 uint32_t psrtype =
4920                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4921                 uint32_t eitr =
4922                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4923
4924                 /*
4925                  * ixgbe PMD doesn't support header-split at the moment.
4926                  *
4927                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4928                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4929                  * should be configured even if header split is not
4930                  * enabled. We will configure it 128 bytes following the
4931                  * recommendation in the spec.
4932                  */
4933                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4934                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4935                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4936
4937                 /*
4938                  * TODO: Consider setting the Receive Descriptor Minimum
4939                  * Threshold Size for an RSC case. This is not an obviously
4940                  * beneficiary option but the one worth considering...
4941                  */
4942
4943                 rscctl |= IXGBE_RSCCTL_RSCEN;
4944                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4945                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4946
4947                 /*
4948                  * RSC: Set ITR interval corresponding to 2K ints/s.
4949                  *
4950                  * Full-sized RSC aggregations for a 10Gb/s link will
4951                  * arrive at about 20K aggregation/s rate.
4952                  *
4953                  * 2K inst/s rate will make only 10% of the
4954                  * aggregations to be closed due to the interrupt timer
4955                  * expiration for a streaming at wire-speed case.
4956                  *
4957                  * For a sparse streaming case this setting will yield
4958                  * at most 500us latency for a single RSC aggregation.
4959                  */
4960                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4961                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4962                 eitr |= IXGBE_EITR_CNT_WDIS;
4963
4964                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4965                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4966                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4967                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4968
4969                 /*
4970                  * RSC requires the mapping of the queue to the
4971                  * interrupt vector.
4972                  */
4973                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4974         }
4975
4976         dev->data->lro = 1;
4977
4978         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4979
4980         return 0;
4981 }
4982
4983 /*
4984  * Initializes Receive Unit.
4985  */
4986 int __attribute__((cold))
4987 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4988 {
4989         struct ixgbe_hw     *hw;
4990         struct ixgbe_rx_queue *rxq;
4991         uint64_t bus_addr;
4992         uint32_t rxctrl;
4993         uint32_t fctrl;
4994         uint32_t hlreg0;
4995         uint32_t maxfrs;
4996         uint32_t srrctl;
4997         uint32_t rdrxctl;
4998         uint32_t rxcsum;
4999         uint16_t buf_size;
5000         uint16_t i;
5001         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5002         int rc;
5003
5004         PMD_INIT_FUNC_TRACE();
5005         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5006
5007         /*
5008          * Make sure receives are disabled while setting
5009          * up the RX context (registers, descriptor rings, etc.).
5010          */
5011         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5012         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5013
5014         /* Enable receipt of broadcasted frames */
5015         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5016         fctrl |= IXGBE_FCTRL_BAM;
5017         fctrl |= IXGBE_FCTRL_DPF;
5018         fctrl |= IXGBE_FCTRL_PMCF;
5019         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5020
5021         /*
5022          * Configure CRC stripping, if any.
5023          */
5024         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5025         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5026                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5027         else
5028                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5029
5030         /*
5031          * Configure jumbo frame support, if any.
5032          */
5033         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5034                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5035                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5036                 maxfrs &= 0x0000FFFF;
5037                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5038                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5039         } else
5040                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5041
5042         /*
5043          * If loopback mode is configured, set LPBK bit.
5044          */
5045         if (dev->data->dev_conf.lpbk_mode != 0) {
5046                 rc = ixgbe_check_supported_loopback_mode(dev);
5047                 if (rc < 0) {
5048                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5049                         return rc;
5050                 }
5051                 hlreg0 |= IXGBE_HLREG0_LPBK;
5052         } else {
5053                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5054         }
5055
5056         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5057
5058         /*
5059          * Assume no header split and no VLAN strip support
5060          * on any Rx queue first .
5061          */
5062         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5063         /* Setup RX queues */
5064         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5065                 rxq = dev->data->rx_queues[i];
5066
5067                 /*
5068                  * Reset crc_len in case it was changed after queue setup by a
5069                  * call to configure.
5070                  */
5071                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5072                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5073                 else
5074                         rxq->crc_len = 0;
5075
5076                 /* Setup the Base and Length of the Rx Descriptor Rings */
5077                 bus_addr = rxq->rx_ring_phys_addr;
5078                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5079                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5080                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5081                                 (uint32_t)(bus_addr >> 32));
5082                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5083                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5084                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5085                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5086
5087                 /* Configure the SRRCTL register */
5088                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5089
5090                 /* Set if packets are dropped when no descriptors available */
5091                 if (rxq->drop_en)
5092                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5093
5094                 /*
5095                  * Configure the RX buffer size in the BSIZEPACKET field of
5096                  * the SRRCTL register of the queue.
5097                  * The value is in 1 KB resolution. Valid values can be from
5098                  * 1 KB to 16 KB.
5099                  */
5100                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5101                         RTE_PKTMBUF_HEADROOM);
5102                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5103                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5104
5105                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5106
5107                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5108                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5109
5110                 /* It adds dual VLAN length for supporting dual VLAN */
5111                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5112                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5113                         dev->data->scattered_rx = 1;
5114                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5115                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5116         }
5117
5118         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5119                 dev->data->scattered_rx = 1;
5120
5121         /*
5122          * Device configured with multiple RX queues.
5123          */
5124         ixgbe_dev_mq_rx_configure(dev);
5125
5126         /*
5127          * Setup the Checksum Register.
5128          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5129          * Enable IP/L4 checkum computation by hardware if requested to do so.
5130          */
5131         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5132         rxcsum |= IXGBE_RXCSUM_PCSD;
5133         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5134                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5135         else
5136                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5137
5138         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5139
5140         if (hw->mac.type == ixgbe_mac_82599EB ||
5141             hw->mac.type == ixgbe_mac_X540) {
5142                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5143                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5144                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5145                 else
5146                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5147                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5148                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5149         }
5150
5151         rc = ixgbe_set_rsc(dev);
5152         if (rc)
5153                 return rc;
5154
5155         ixgbe_set_rx_function(dev);
5156
5157         return 0;
5158 }
5159
5160 /*
5161  * Initializes Transmit Unit.
5162  */
5163 void __attribute__((cold))
5164 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5165 {
5166         struct ixgbe_hw     *hw;
5167         struct ixgbe_tx_queue *txq;
5168         uint64_t bus_addr;
5169         uint32_t hlreg0;
5170         uint32_t txctrl;
5171         uint16_t i;
5172
5173         PMD_INIT_FUNC_TRACE();
5174         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5175
5176         /* Enable TX CRC (checksum offload requirement) and hw padding
5177          * (TSO requirement)
5178          */
5179         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5180         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5181         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5182
5183         /* Setup the Base and Length of the Tx Descriptor Rings */
5184         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5185                 txq = dev->data->tx_queues[i];
5186
5187                 bus_addr = txq->tx_ring_phys_addr;
5188                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5189                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5190                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5191                                 (uint32_t)(bus_addr >> 32));
5192                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5193                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5194                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5195                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5196                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5197
5198                 /*
5199                  * Disable Tx Head Writeback RO bit, since this hoses
5200                  * bookkeeping if things aren't delivered in order.
5201                  */
5202                 switch (hw->mac.type) {
5203                 case ixgbe_mac_82598EB:
5204                         txctrl = IXGBE_READ_REG(hw,
5205                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5206                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5207                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5208                                         txctrl);
5209                         break;
5210
5211                 case ixgbe_mac_82599EB:
5212                 case ixgbe_mac_X540:
5213                 case ixgbe_mac_X550:
5214                 case ixgbe_mac_X550EM_x:
5215                 case ixgbe_mac_X550EM_a:
5216                 default:
5217                         txctrl = IXGBE_READ_REG(hw,
5218                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5219                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5220                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5221                                         txctrl);
5222                         break;
5223                 }
5224         }
5225
5226         /* Device configured with multiple TX queues. */
5227         ixgbe_dev_mq_tx_configure(dev);
5228 }
5229
5230 /*
5231  * Check if requested loopback mode is supported
5232  */
5233 int
5234 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5235 {
5236         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5237
5238         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5239                 if (hw->mac.type == ixgbe_mac_82599EB ||
5240                      hw->mac.type == ixgbe_mac_X540 ||
5241                      hw->mac.type == ixgbe_mac_X550 ||
5242                      hw->mac.type == ixgbe_mac_X550EM_x ||
5243                      hw->mac.type == ixgbe_mac_X550EM_a)
5244                         return 0;
5245
5246         return -ENOTSUP;
5247 }
5248
5249 /*
5250  * Set up link for 82599 loopback mode Tx->Rx.
5251  */
5252 static inline void __attribute__((cold))
5253 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5254 {
5255         PMD_INIT_FUNC_TRACE();
5256
5257         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5258                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5259                                 IXGBE_SUCCESS) {
5260                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5261                         /* ignore error */
5262                         return;
5263                 }
5264         }
5265
5266         /* Restart link */
5267         IXGBE_WRITE_REG(hw,
5268                         IXGBE_AUTOC,
5269                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5270         ixgbe_reset_pipeline_82599(hw);
5271
5272         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5273         msec_delay(50);
5274 }
5275
5276
5277 /*
5278  * Start Transmit and Receive Units.
5279  */
5280 int __attribute__((cold))
5281 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5282 {
5283         struct ixgbe_hw     *hw;
5284         struct ixgbe_tx_queue *txq;
5285         struct ixgbe_rx_queue *rxq;
5286         uint32_t txdctl;
5287         uint32_t dmatxctl;
5288         uint32_t rxctrl;
5289         uint16_t i;
5290         int ret = 0;
5291
5292         PMD_INIT_FUNC_TRACE();
5293         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5294
5295         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5296                 txq = dev->data->tx_queues[i];
5297                 /* Setup Transmit Threshold Registers */
5298                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5299                 txdctl |= txq->pthresh & 0x7F;
5300                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5301                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5302                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5303         }
5304
5305         if (hw->mac.type != ixgbe_mac_82598EB) {
5306                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5307                 dmatxctl |= IXGBE_DMATXCTL_TE;
5308                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5309         }
5310
5311         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5312                 txq = dev->data->tx_queues[i];
5313                 if (!txq->tx_deferred_start) {
5314                         ret = ixgbe_dev_tx_queue_start(dev, i);
5315                         if (ret < 0)
5316                                 return ret;
5317                 }
5318         }
5319
5320         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5321                 rxq = dev->data->rx_queues[i];
5322                 if (!rxq->rx_deferred_start) {
5323                         ret = ixgbe_dev_rx_queue_start(dev, i);
5324                         if (ret < 0)
5325                                 return ret;
5326                 }
5327         }
5328
5329         /* Enable Receive engine */
5330         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5331         if (hw->mac.type == ixgbe_mac_82598EB)
5332                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5333         rxctrl |= IXGBE_RXCTRL_RXEN;
5334         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5335
5336         /* If loopback mode is enabled, set up the link accordingly */
5337         if (dev->data->dev_conf.lpbk_mode != 0) {
5338                 if (hw->mac.type == ixgbe_mac_82599EB)
5339                         ixgbe_setup_loopback_link_82599(hw);
5340                 else if (hw->mac.type == ixgbe_mac_X540 ||
5341                      hw->mac.type == ixgbe_mac_X550 ||
5342                      hw->mac.type == ixgbe_mac_X550EM_x ||
5343                      hw->mac.type == ixgbe_mac_X550EM_a)
5344                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5345         }
5346
5347 #ifdef RTE_LIBRTE_SECURITY
5348         if ((dev->data->dev_conf.rxmode.offloads &
5349                         DEV_RX_OFFLOAD_SECURITY) ||
5350                 (dev->data->dev_conf.txmode.offloads &
5351                         DEV_TX_OFFLOAD_SECURITY)) {
5352                 ret = ixgbe_crypto_enable_ipsec(dev);
5353                 if (ret != 0) {
5354                         PMD_DRV_LOG(ERR,
5355                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5356                                     ret);
5357                         return ret;
5358                 }
5359         }
5360 #endif
5361
5362         return 0;
5363 }
5364
5365 /*
5366  * Start Receive Units for specified queue.
5367  */
5368 int __attribute__((cold))
5369 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5370 {
5371         struct ixgbe_hw     *hw;
5372         struct ixgbe_rx_queue *rxq;
5373         uint32_t rxdctl;
5374         int poll_ms;
5375
5376         PMD_INIT_FUNC_TRACE();
5377         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5378
5379         rxq = dev->data->rx_queues[rx_queue_id];
5380
5381         /* Allocate buffers for descriptor rings */
5382         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5383                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5384                              rx_queue_id);
5385                 return -1;
5386         }
5387         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5388         rxdctl |= IXGBE_RXDCTL_ENABLE;
5389         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5390
5391         /* Wait until RX Enable ready */
5392         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5393         do {
5394                 rte_delay_ms(1);
5395                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5396         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5397         if (!poll_ms)
5398                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5399         rte_wmb();
5400         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5401         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5402         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5403
5404         return 0;
5405 }
5406
5407 /*
5408  * Stop Receive Units for specified queue.
5409  */
5410 int __attribute__((cold))
5411 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5412 {
5413         struct ixgbe_hw     *hw;
5414         struct ixgbe_adapter *adapter = dev->data->dev_private;
5415         struct ixgbe_rx_queue *rxq;
5416         uint32_t rxdctl;
5417         int poll_ms;
5418
5419         PMD_INIT_FUNC_TRACE();
5420         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5421
5422         rxq = dev->data->rx_queues[rx_queue_id];
5423
5424         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5425         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5426         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5427
5428         /* Wait until RX Enable bit clear */
5429         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5430         do {
5431                 rte_delay_ms(1);
5432                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5433         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5434         if (!poll_ms)
5435                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5436
5437         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5438
5439         ixgbe_rx_queue_release_mbufs(rxq);
5440         ixgbe_reset_rx_queue(adapter, rxq);
5441         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5442
5443         return 0;
5444 }
5445
5446
5447 /*
5448  * Start Transmit Units for specified queue.
5449  */
5450 int __attribute__((cold))
5451 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5452 {
5453         struct ixgbe_hw     *hw;
5454         struct ixgbe_tx_queue *txq;
5455         uint32_t txdctl;
5456         int poll_ms;
5457
5458         PMD_INIT_FUNC_TRACE();
5459         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5460
5461         txq = dev->data->tx_queues[tx_queue_id];
5462         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5463         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5464         txdctl |= IXGBE_TXDCTL_ENABLE;
5465         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5466
5467         /* Wait until TX Enable ready */
5468         if (hw->mac.type == ixgbe_mac_82599EB) {
5469                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5470                 do {
5471                         rte_delay_ms(1);
5472                         txdctl = IXGBE_READ_REG(hw,
5473                                 IXGBE_TXDCTL(txq->reg_idx));
5474                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5475                 if (!poll_ms)
5476                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5477                                 tx_queue_id);
5478         }
5479         rte_wmb();
5480         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5481         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5482
5483         return 0;
5484 }
5485
5486 /*
5487  * Stop Transmit Units for specified queue.
5488  */
5489 int __attribute__((cold))
5490 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5491 {
5492         struct ixgbe_hw     *hw;
5493         struct ixgbe_tx_queue *txq;
5494         uint32_t txdctl;
5495         uint32_t txtdh, txtdt;
5496         int poll_ms;
5497
5498         PMD_INIT_FUNC_TRACE();
5499         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5500
5501         txq = dev->data->tx_queues[tx_queue_id];
5502
5503         /* Wait until TX queue is empty */
5504         if (hw->mac.type == ixgbe_mac_82599EB) {
5505                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5506                 do {
5507                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5508                         txtdh = IXGBE_READ_REG(hw,
5509                                                IXGBE_TDH(txq->reg_idx));
5510                         txtdt = IXGBE_READ_REG(hw,
5511                                                IXGBE_TDT(txq->reg_idx));
5512                 } while (--poll_ms && (txtdh != txtdt));
5513                 if (!poll_ms)
5514                         PMD_INIT_LOG(ERR,
5515                                 "Tx Queue %d is not empty when stopping.",
5516                                 tx_queue_id);
5517         }
5518
5519         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5520         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5521         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5522
5523         /* Wait until TX Enable bit clear */
5524         if (hw->mac.type == ixgbe_mac_82599EB) {
5525                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5526                 do {
5527                         rte_delay_ms(1);
5528                         txdctl = IXGBE_READ_REG(hw,
5529                                                 IXGBE_TXDCTL(txq->reg_idx));
5530                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5531                 if (!poll_ms)
5532                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5533                                 tx_queue_id);
5534         }
5535
5536         if (txq->ops != NULL) {
5537                 txq->ops->release_mbufs(txq);
5538                 txq->ops->reset(txq);
5539         }
5540         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5541
5542         return 0;
5543 }
5544
5545 void
5546 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5547         struct rte_eth_rxq_info *qinfo)
5548 {
5549         struct ixgbe_rx_queue *rxq;
5550
5551         rxq = dev->data->rx_queues[queue_id];
5552
5553         qinfo->mp = rxq->mb_pool;
5554         qinfo->scattered_rx = dev->data->scattered_rx;
5555         qinfo->nb_desc = rxq->nb_rx_desc;
5556
5557         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5558         qinfo->conf.rx_drop_en = rxq->drop_en;
5559         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5560         qinfo->conf.offloads = rxq->offloads;
5561 }
5562
5563 void
5564 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5565         struct rte_eth_txq_info *qinfo)
5566 {
5567         struct ixgbe_tx_queue *txq;
5568
5569         txq = dev->data->tx_queues[queue_id];
5570
5571         qinfo->nb_desc = txq->nb_tx_desc;
5572
5573         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5574         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5575         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5576
5577         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5578         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5579         qinfo->conf.offloads = txq->offloads;
5580         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5581 }
5582
5583 /*
5584  * [VF] Initializes Receive Unit.
5585  */
5586 int __attribute__((cold))
5587 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5588 {
5589         struct ixgbe_hw     *hw;
5590         struct ixgbe_rx_queue *rxq;
5591         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5592         uint64_t bus_addr;
5593         uint32_t srrctl, psrtype = 0;
5594         uint16_t buf_size;
5595         uint16_t i;
5596         int ret;
5597
5598         PMD_INIT_FUNC_TRACE();
5599         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5600
5601         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5602                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5603                         "it should be power of 2");
5604                 return -1;
5605         }
5606
5607         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5608                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5609                         "it should be equal to or less than %d",
5610                         hw->mac.max_rx_queues);
5611                 return -1;
5612         }
5613
5614         /*
5615          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5616          * disables the VF receipt of packets if the PF MTU is > 1500.
5617          * This is done to deal with 82599 limitations that imposes
5618          * the PF and all VFs to share the same MTU.
5619          * Then, the PF driver enables again the VF receipt of packet when
5620          * the VF driver issues a IXGBE_VF_SET_LPE request.
5621          * In the meantime, the VF device cannot be used, even if the VF driver
5622          * and the Guest VM network stack are ready to accept packets with a
5623          * size up to the PF MTU.
5624          * As a work-around to this PF behaviour, force the call to
5625          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5626          * VF packets received can work in all cases.
5627          */
5628         ixgbevf_rlpml_set_vf(hw,
5629                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5630
5631         /*
5632          * Assume no header split and no VLAN strip support
5633          * on any Rx queue first .
5634          */
5635         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5636         /* Setup RX queues */
5637         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5638                 rxq = dev->data->rx_queues[i];
5639
5640                 /* Allocate buffers for descriptor rings */
5641                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5642                 if (ret)
5643                         return ret;
5644
5645                 /* Setup the Base and Length of the Rx Descriptor Rings */
5646                 bus_addr = rxq->rx_ring_phys_addr;
5647
5648                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5649                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5650                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5651                                 (uint32_t)(bus_addr >> 32));
5652                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5653                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5654                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5655                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5656
5657
5658                 /* Configure the SRRCTL register */
5659                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5660
5661                 /* Set if packets are dropped when no descriptors available */
5662                 if (rxq->drop_en)
5663                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5664
5665                 /*
5666                  * Configure the RX buffer size in the BSIZEPACKET field of
5667                  * the SRRCTL register of the queue.
5668                  * The value is in 1 KB resolution. Valid values can be from
5669                  * 1 KB to 16 KB.
5670                  */
5671                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5672                         RTE_PKTMBUF_HEADROOM);
5673                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5674                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5675
5676                 /*
5677                  * VF modification to write virtual function SRRCTL register
5678                  */
5679                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5680
5681                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5682                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5683
5684                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5685                     /* It adds dual VLAN length for supporting dual VLAN */
5686                     (rxmode->max_rx_pkt_len +
5687                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5688                         if (!dev->data->scattered_rx)
5689                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5690                         dev->data->scattered_rx = 1;
5691                 }
5692
5693                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5694                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5695         }
5696
5697         /* Set RQPL for VF RSS according to max Rx queue */
5698         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5699                 IXGBE_PSRTYPE_RQPL_SHIFT;
5700         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5701
5702         ixgbe_set_rx_function(dev);
5703
5704         return 0;
5705 }
5706
5707 /*
5708  * [VF] Initializes Transmit Unit.
5709  */
5710 void __attribute__((cold))
5711 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5712 {
5713         struct ixgbe_hw     *hw;
5714         struct ixgbe_tx_queue *txq;
5715         uint64_t bus_addr;
5716         uint32_t txctrl;
5717         uint16_t i;
5718
5719         PMD_INIT_FUNC_TRACE();
5720         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5721
5722         /* Setup the Base and Length of the Tx Descriptor Rings */
5723         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5724                 txq = dev->data->tx_queues[i];
5725                 bus_addr = txq->tx_ring_phys_addr;
5726                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5727                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5728                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5729                                 (uint32_t)(bus_addr >> 32));
5730                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5731                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5732                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5733                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5734                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5735
5736                 /*
5737                  * Disable Tx Head Writeback RO bit, since this hoses
5738                  * bookkeeping if things aren't delivered in order.
5739                  */
5740                 txctrl = IXGBE_READ_REG(hw,
5741                                 IXGBE_VFDCA_TXCTRL(i));
5742                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5743                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5744                                 txctrl);
5745         }
5746 }
5747
5748 /*
5749  * [VF] Start Transmit and Receive Units.
5750  */
5751 void __attribute__((cold))
5752 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5753 {
5754         struct ixgbe_hw     *hw;
5755         struct ixgbe_tx_queue *txq;
5756         struct ixgbe_rx_queue *rxq;
5757         uint32_t txdctl;
5758         uint32_t rxdctl;
5759         uint16_t i;
5760         int poll_ms;
5761
5762         PMD_INIT_FUNC_TRACE();
5763         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5764
5765         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5766                 txq = dev->data->tx_queues[i];
5767                 /* Setup Transmit Threshold Registers */
5768                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5769                 txdctl |= txq->pthresh & 0x7F;
5770                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5771                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5772                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5773         }
5774
5775         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5776
5777                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5778                 txdctl |= IXGBE_TXDCTL_ENABLE;
5779                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5780
5781                 poll_ms = 10;
5782                 /* Wait until TX Enable ready */
5783                 do {
5784                         rte_delay_ms(1);
5785                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5786                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5787                 if (!poll_ms)
5788                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5789         }
5790         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5791
5792                 rxq = dev->data->rx_queues[i];
5793
5794                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5795                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5796                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5797
5798                 /* Wait until RX Enable ready */
5799                 poll_ms = 10;
5800                 do {
5801                         rte_delay_ms(1);
5802                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5803                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5804                 if (!poll_ms)
5805                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5806                 rte_wmb();
5807                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5808
5809         }
5810 }
5811
5812 int
5813 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5814                     const struct rte_flow_action_rss *in)
5815 {
5816         if (in->key_len > RTE_DIM(out->key) ||
5817             in->queue_num > RTE_DIM(out->queue))
5818                 return -EINVAL;
5819         out->conf = (struct rte_flow_action_rss){
5820                 .func = in->func,
5821                 .level = in->level,
5822                 .types = in->types,
5823                 .key_len = in->key_len,
5824                 .queue_num = in->queue_num,
5825                 .key = memcpy(out->key, in->key, in->key_len),
5826                 .queue = memcpy(out->queue, in->queue,
5827                                 sizeof(*in->queue) * in->queue_num),
5828         };
5829         return 0;
5830 }
5831
5832 int
5833 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5834                       const struct rte_flow_action_rss *with)
5835 {
5836         return (comp->func == with->func &&
5837                 comp->level == with->level &&
5838                 comp->types == with->types &&
5839                 comp->key_len == with->key_len &&
5840                 comp->queue_num == with->queue_num &&
5841                 !memcmp(comp->key, with->key, with->key_len) &&
5842                 !memcmp(comp->queue, with->queue,
5843                         sizeof(*with->queue) * with->queue_num));
5844 }
5845
5846 int
5847 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5848                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5849 {
5850         struct ixgbe_hw *hw;
5851         uint32_t reta;
5852         uint16_t i;
5853         uint16_t j;
5854         uint16_t sp_reta_size;
5855         uint32_t reta_reg;
5856         struct rte_eth_rss_conf rss_conf = {
5857                 .rss_key = conf->conf.key_len ?
5858                         (void *)(uintptr_t)conf->conf.key : NULL,
5859                 .rss_key_len = conf->conf.key_len,
5860                 .rss_hf = conf->conf.types,
5861         };
5862         struct ixgbe_filter_info *filter_info =
5863                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5864
5865         PMD_INIT_FUNC_TRACE();
5866         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5867
5868         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5869
5870         if (!add) {
5871                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5872                                           &conf->conf)) {
5873                         ixgbe_rss_disable(dev);
5874                         memset(&filter_info->rss_info, 0,
5875                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5876                         return 0;
5877                 }
5878                 return -EINVAL;
5879         }
5880
5881         if (filter_info->rss_info.conf.queue_num)
5882                 return -EINVAL;
5883         /* Fill in redirection table
5884          * The byte-swap is needed because NIC registers are in
5885          * little-endian order.
5886          */
5887         reta = 0;
5888         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5889                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5890
5891                 if (j == conf->conf.queue_num)
5892                         j = 0;
5893                 reta = (reta << 8) | conf->conf.queue[j];
5894                 if ((i & 3) == 3)
5895                         IXGBE_WRITE_REG(hw, reta_reg,
5896                                         rte_bswap32(reta));
5897         }
5898
5899         /* Configure the RSS key and the RSS protocols used to compute
5900          * the RSS hash of input packets.
5901          */
5902         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5903                 ixgbe_rss_disable(dev);
5904                 return 0;
5905         }
5906         if (rss_conf.rss_key == NULL)
5907                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5908         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5909
5910         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5911                 return -EINVAL;
5912
5913         return 0;
5914 }
5915
5916 /* Stubs needed for linkage when CONFIG_RTE_ARCH_PPC_64 is set */
5917 #if defined(RTE_ARCH_PPC_64)
5918 int
5919 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5920 {
5921         return -1;
5922 }
5923
5924 uint16_t
5925 ixgbe_recv_pkts_vec(
5926         void __rte_unused *rx_queue,
5927         struct rte_mbuf __rte_unused **rx_pkts,
5928         uint16_t __rte_unused nb_pkts)
5929 {
5930         return 0;
5931 }
5932
5933 uint16_t
5934 ixgbe_recv_scattered_pkts_vec(
5935         void __rte_unused *rx_queue,
5936         struct rte_mbuf __rte_unused **rx_pkts,
5937         uint16_t __rte_unused nb_pkts)
5938 {
5939         return 0;
5940 }
5941
5942 int
5943 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5944 {
5945         return -1;
5946 }
5947 #endif