net/ixgbe: fix RSS flow error return
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_prefetch.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_ip.h>
44 #include <rte_net.h>
45
46 #include "ixgbe_logs.h"
47 #include "base/ixgbe_api.h"
48 #include "base/ixgbe_vf.h"
49 #include "ixgbe_ethdev.h"
50 #include "base/ixgbe_dcb.h"
51 #include "base/ixgbe_common.h"
52 #include "ixgbe_rxtx.h"
53
54 #ifdef RTE_LIBRTE_IEEE1588
55 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
56 #else
57 #define IXGBE_TX_IEEE1588_TMST 0
58 #endif
59 /* Bit Mask to indicate what bits required for building TX context */
60 #define IXGBE_TX_OFFLOAD_MASK (                  \
61                 PKT_TX_VLAN_PKT |                \
62                 PKT_TX_IP_CKSUM |                \
63                 PKT_TX_L4_MASK |                 \
64                 PKT_TX_TCP_SEG |                 \
65                 PKT_TX_MACSEC |                  \
66                 PKT_TX_OUTER_IP_CKSUM |          \
67                 PKT_TX_SEC_OFFLOAD |     \
68                 IXGBE_TX_IEEE1588_TMST)
69
70 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
71                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
72
73 #if 1
74 #define RTE_PMD_USE_PREFETCH
75 #endif
76
77 #ifdef RTE_PMD_USE_PREFETCH
78 /*
79  * Prefetch a cache line into all cache levels.
80  */
81 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
82 #else
83 #define rte_ixgbe_prefetch(p)   do {} while (0)
84 #endif
85
86 #ifdef RTE_IXGBE_INC_VECTOR
87 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
88                                     uint16_t nb_pkts);
89 #endif
90
91 /*********************************************************************
92  *
93  *  TX functions
94  *
95  **********************************************************************/
96
97 /*
98  * Check for descriptors with their DD bit set and free mbufs.
99  * Return the total number of buffers freed.
100  */
101 static __rte_always_inline int
102 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
103 {
104         struct ixgbe_tx_entry *txep;
105         uint32_t status;
106         int i, nb_free = 0;
107         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
108
109         /* check DD bit on threshold descriptor */
110         status = txq->tx_ring[txq->tx_next_dd].wb.status;
111         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
112                 return 0;
113
114         /*
115          * first buffer to free from S/W ring is at index
116          * tx_next_dd - (tx_rs_thresh-1)
117          */
118         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
119
120         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
121                 /* free buffers one at a time */
122                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
123                 txep->mbuf = NULL;
124
125                 if (unlikely(m == NULL))
126                         continue;
127
128                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
129                     (nb_free > 0 && m->pool != free[0]->pool)) {
130                         rte_mempool_put_bulk(free[0]->pool,
131                                              (void **)free, nb_free);
132                         nb_free = 0;
133                 }
134
135                 free[nb_free++] = m;
136         }
137
138         if (nb_free > 0)
139                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
140
141         /* buffers were freed, update counters */
142         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
143         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
144         if (txq->tx_next_dd >= txq->nb_tx_desc)
145                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
146
147         return txq->tx_rs_thresh;
148 }
149
150 /* Populate 4 descriptors with data from 4 mbufs */
151 static inline void
152 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
153 {
154         uint64_t buf_dma_addr;
155         uint32_t pkt_len;
156         int i;
157
158         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
159                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
160                 pkt_len = (*pkts)->data_len;
161
162                 /* write data to descriptor */
163                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
164
165                 txdp->read.cmd_type_len =
166                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
167
168                 txdp->read.olinfo_status =
169                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
170
171                 rte_prefetch0(&(*pkts)->pool);
172         }
173 }
174
175 /* Populate 1 descriptor with data from 1 mbuf */
176 static inline void
177 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
178 {
179         uint64_t buf_dma_addr;
180         uint32_t pkt_len;
181
182         buf_dma_addr = rte_mbuf_data_iova(*pkts);
183         pkt_len = (*pkts)->data_len;
184
185         /* write data to descriptor */
186         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187         txdp->read.cmd_type_len =
188                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
189         txdp->read.olinfo_status =
190                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
191         rte_prefetch0(&(*pkts)->pool);
192 }
193
194 /*
195  * Fill H/W descriptor ring with mbuf data.
196  * Copy mbuf pointers to the S/W ring.
197  */
198 static inline void
199 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
200                       uint16_t nb_pkts)
201 {
202         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
203         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
204         const int N_PER_LOOP = 4;
205         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
206         int mainpart, leftover;
207         int i, j;
208
209         /*
210          * Process most of the packets in chunks of N pkts.  Any
211          * leftover packets will get processed one at a time.
212          */
213         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
214         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
215         for (i = 0; i < mainpart; i += N_PER_LOOP) {
216                 /* Copy N mbuf pointers to the S/W ring */
217                 for (j = 0; j < N_PER_LOOP; ++j) {
218                         (txep + i + j)->mbuf = *(pkts + i + j);
219                 }
220                 tx4(txdp + i, pkts + i);
221         }
222
223         if (unlikely(leftover > 0)) {
224                 for (i = 0; i < leftover; ++i) {
225                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
226                         tx1(txdp + mainpart + i, pkts + mainpart + i);
227                 }
228         }
229 }
230
231 static inline uint16_t
232 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
233              uint16_t nb_pkts)
234 {
235         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
236         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
237         uint16_t n = 0;
238
239         /*
240          * Begin scanning the H/W ring for done descriptors when the
241          * number of available descriptors drops below tx_free_thresh.  For
242          * each done descriptor, free the associated buffer.
243          */
244         if (txq->nb_tx_free < txq->tx_free_thresh)
245                 ixgbe_tx_free_bufs(txq);
246
247         /* Only use descriptors that are available */
248         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
249         if (unlikely(nb_pkts == 0))
250                 return 0;
251
252         /* Use exactly nb_pkts descriptors */
253         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
254
255         /*
256          * At this point, we know there are enough descriptors in the
257          * ring to transmit all the packets.  This assumes that each
258          * mbuf contains a single segment, and that no new offloads
259          * are expected, which would require a new context descriptor.
260          */
261
262         /*
263          * See if we're going to wrap-around. If so, handle the top
264          * of the descriptor ring first, then do the bottom.  If not,
265          * the processing looks just like the "bottom" part anyway...
266          */
267         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
268                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
269                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
270
271                 /*
272                  * We know that the last descriptor in the ring will need to
273                  * have its RS bit set because tx_rs_thresh has to be
274                  * a divisor of the ring size
275                  */
276                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
277                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
278                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
279
280                 txq->tx_tail = 0;
281         }
282
283         /* Fill H/W descriptor ring with mbuf data */
284         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
285         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
286
287         /*
288          * Determine if RS bit should be set
289          * This is what we actually want:
290          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
291          * but instead of subtracting 1 and doing >=, we can just do
292          * greater than without subtracting.
293          */
294         if (txq->tx_tail > txq->tx_next_rs) {
295                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
296                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
297                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
298                                                 txq->tx_rs_thresh);
299                 if (txq->tx_next_rs >= txq->nb_tx_desc)
300                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
301         }
302
303         /*
304          * Check for wrap-around. This would only happen if we used
305          * up to the last descriptor in the ring, no more, no less.
306          */
307         if (txq->tx_tail >= txq->nb_tx_desc)
308                 txq->tx_tail = 0;
309
310         /* update tail pointer */
311         rte_wmb();
312         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
313
314         return nb_pkts;
315 }
316
317 uint16_t
318 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
319                        uint16_t nb_pkts)
320 {
321         uint16_t nb_tx;
322
323         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
324         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
325                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
326
327         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
328         nb_tx = 0;
329         while (nb_pkts) {
330                 uint16_t ret, n;
331
332                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
333                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
334                 nb_tx = (uint16_t)(nb_tx + ret);
335                 nb_pkts = (uint16_t)(nb_pkts - ret);
336                 if (ret < n)
337                         break;
338         }
339
340         return nb_tx;
341 }
342
343 #ifdef RTE_IXGBE_INC_VECTOR
344 static uint16_t
345 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
346                     uint16_t nb_pkts)
347 {
348         uint16_t nb_tx = 0;
349         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
350
351         while (nb_pkts) {
352                 uint16_t ret, num;
353
354                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
355                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
356                                                  num);
357                 nb_tx += ret;
358                 nb_pkts -= ret;
359                 if (ret < num)
360                         break;
361         }
362
363         return nb_tx;
364 }
365 #endif
366
367 static inline void
368 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
369                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
370                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
371                 __rte_unused uint64_t *mdata)
372 {
373         uint32_t type_tucmd_mlhl;
374         uint32_t mss_l4len_idx = 0;
375         uint32_t ctx_idx;
376         uint32_t vlan_macip_lens;
377         union ixgbe_tx_offload tx_offload_mask;
378         uint32_t seqnum_seed = 0;
379
380         ctx_idx = txq->ctx_curr;
381         tx_offload_mask.data[0] = 0;
382         tx_offload_mask.data[1] = 0;
383         type_tucmd_mlhl = 0;
384
385         /* Specify which HW CTX to upload. */
386         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
387
388         if (ol_flags & PKT_TX_VLAN_PKT) {
389                 tx_offload_mask.vlan_tci |= ~0;
390         }
391
392         /* check if TCP segmentation required for this packet */
393         if (ol_flags & PKT_TX_TCP_SEG) {
394                 /* implies IP cksum in IPv4 */
395                 if (ol_flags & PKT_TX_IP_CKSUM)
396                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
397                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
398                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
399                 else
400                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
401                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
402                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
403
404                 tx_offload_mask.l2_len |= ~0;
405                 tx_offload_mask.l3_len |= ~0;
406                 tx_offload_mask.l4_len |= ~0;
407                 tx_offload_mask.tso_segsz |= ~0;
408                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
409                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
410         } else { /* no TSO, check if hardware checksum is needed */
411                 if (ol_flags & PKT_TX_IP_CKSUM) {
412                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
413                         tx_offload_mask.l2_len |= ~0;
414                         tx_offload_mask.l3_len |= ~0;
415                 }
416
417                 switch (ol_flags & PKT_TX_L4_MASK) {
418                 case PKT_TX_UDP_CKSUM:
419                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
420                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
421                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
422                         tx_offload_mask.l2_len |= ~0;
423                         tx_offload_mask.l3_len |= ~0;
424                         break;
425                 case PKT_TX_TCP_CKSUM:
426                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
429                         tx_offload_mask.l2_len |= ~0;
430                         tx_offload_mask.l3_len |= ~0;
431                         break;
432                 case PKT_TX_SCTP_CKSUM:
433                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
434                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
435                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
436                         tx_offload_mask.l2_len |= ~0;
437                         tx_offload_mask.l3_len |= ~0;
438                         break;
439                 default:
440                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
441                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
442                         break;
443                 }
444         }
445
446         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
447                 tx_offload_mask.outer_l2_len |= ~0;
448                 tx_offload_mask.outer_l3_len |= ~0;
449                 tx_offload_mask.l2_len |= ~0;
450                 seqnum_seed |= tx_offload.outer_l3_len
451                                << IXGBE_ADVTXD_OUTER_IPLEN;
452                 seqnum_seed |= tx_offload.l2_len
453                                << IXGBE_ADVTXD_TUNNEL_LEN;
454         }
455 #ifdef RTE_LIBRTE_SECURITY
456         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
457                 union ixgbe_crypto_tx_desc_md *md =
458                                 (union ixgbe_crypto_tx_desc_md *)mdata;
459                 seqnum_seed |=
460                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
461                 type_tucmd_mlhl |= md->enc ?
462                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
463                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
464                 type_tucmd_mlhl |=
465                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
466                 tx_offload_mask.sa_idx |= ~0;
467                 tx_offload_mask.sec_pad_len |= ~0;
468         }
469 #endif
470
471         txq->ctx_cache[ctx_idx].flags = ol_flags;
472         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
473                 tx_offload_mask.data[0] & tx_offload.data[0];
474         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
475                 tx_offload_mask.data[1] & tx_offload.data[1];
476         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
477
478         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
479         vlan_macip_lens = tx_offload.l3_len;
480         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
481                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
482                                     IXGBE_ADVTXD_MACLEN_SHIFT);
483         else
484                 vlan_macip_lens |= (tx_offload.l2_len <<
485                                     IXGBE_ADVTXD_MACLEN_SHIFT);
486         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
487         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
488         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
489         ctx_txd->seqnum_seed     = seqnum_seed;
490 }
491
492 /*
493  * Check which hardware context can be used. Use the existing match
494  * or create a new context descriptor.
495  */
496 static inline uint32_t
497 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
498                    union ixgbe_tx_offload tx_offload)
499 {
500         /* If match with the current used context */
501         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
502                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
503                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
504                      & tx_offload.data[0])) &&
505                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
506                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
507                      & tx_offload.data[1]))))
508                 return txq->ctx_curr;
509
510         /* What if match with the next context  */
511         txq->ctx_curr ^= 1;
512         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
513                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
514                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
515                      & tx_offload.data[0])) &&
516                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
517                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
518                      & tx_offload.data[1]))))
519                 return txq->ctx_curr;
520
521         /* Mismatch, use the previous context */
522         return IXGBE_CTX_NUM;
523 }
524
525 static inline uint32_t
526 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
527 {
528         uint32_t tmp = 0;
529
530         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
531                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
532         if (ol_flags & PKT_TX_IP_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
534         if (ol_flags & PKT_TX_TCP_SEG)
535                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
536         return tmp;
537 }
538
539 static inline uint32_t
540 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
541 {
542         uint32_t cmdtype = 0;
543
544         if (ol_flags & PKT_TX_VLAN_PKT)
545                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
548         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
549                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
550         if (ol_flags & PKT_TX_MACSEC)
551                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
552         return cmdtype;
553 }
554
555 /* Default RS bit threshold values */
556 #ifndef DEFAULT_TX_RS_THRESH
557 #define DEFAULT_TX_RS_THRESH   32
558 #endif
559 #ifndef DEFAULT_TX_FREE_THRESH
560 #define DEFAULT_TX_FREE_THRESH 32
561 #endif
562
563 /* Reset transmit descriptors after they have been used */
564 static inline int
565 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
566 {
567         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
568         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
569         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
570         uint16_t nb_tx_desc = txq->nb_tx_desc;
571         uint16_t desc_to_clean_to;
572         uint16_t nb_tx_to_clean;
573         uint32_t status;
574
575         /* Determine the last descriptor needing to be cleaned */
576         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
577         if (desc_to_clean_to >= nb_tx_desc)
578                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
579
580         /* Check to make sure the last descriptor to clean is done */
581         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
582         status = txr[desc_to_clean_to].wb.status;
583         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
584                 PMD_TX_FREE_LOG(DEBUG,
585                                 "TX descriptor %4u is not done"
586                                 "(port=%d queue=%d)",
587                                 desc_to_clean_to,
588                                 txq->port_id, txq->queue_id);
589                 /* Failed to clean any descriptors, better luck next time */
590                 return -(1);
591         }
592
593         /* Figure out how many descriptors will be cleaned */
594         if (last_desc_cleaned > desc_to_clean_to)
595                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
596                                                         desc_to_clean_to);
597         else
598                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
599                                                 last_desc_cleaned);
600
601         PMD_TX_FREE_LOG(DEBUG,
602                         "Cleaning %4u TX descriptors: %4u to %4u "
603                         "(port=%d queue=%d)",
604                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
605                         txq->port_id, txq->queue_id);
606
607         /*
608          * The last descriptor to clean is done, so that means all the
609          * descriptors from the last descriptor that was cleaned
610          * up to the last descriptor with the RS bit set
611          * are done. Only reset the threshold descriptor.
612          */
613         txr[desc_to_clean_to].wb.status = 0;
614
615         /* Update the txq to reflect the last descriptor that was cleaned */
616         txq->last_desc_cleaned = desc_to_clean_to;
617         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
618
619         /* No Error */
620         return 0;
621 }
622
623 uint16_t
624 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
625                 uint16_t nb_pkts)
626 {
627         struct ixgbe_tx_queue *txq;
628         struct ixgbe_tx_entry *sw_ring;
629         struct ixgbe_tx_entry *txe, *txn;
630         volatile union ixgbe_adv_tx_desc *txr;
631         volatile union ixgbe_adv_tx_desc *txd, *txp;
632         struct rte_mbuf     *tx_pkt;
633         struct rte_mbuf     *m_seg;
634         uint64_t buf_dma_addr;
635         uint32_t olinfo_status;
636         uint32_t cmd_type_len;
637         uint32_t pkt_len;
638         uint16_t slen;
639         uint64_t ol_flags;
640         uint16_t tx_id;
641         uint16_t tx_last;
642         uint16_t nb_tx;
643         uint16_t nb_used;
644         uint64_t tx_ol_req;
645         uint32_t ctx = 0;
646         uint32_t new_ctx;
647         union ixgbe_tx_offload tx_offload;
648 #ifdef RTE_LIBRTE_SECURITY
649         uint8_t use_ipsec;
650 #endif
651
652         tx_offload.data[0] = 0;
653         tx_offload.data[1] = 0;
654         txq = tx_queue;
655         sw_ring = txq->sw_ring;
656         txr     = txq->tx_ring;
657         tx_id   = txq->tx_tail;
658         txe = &sw_ring[tx_id];
659         txp = NULL;
660
661         /* Determine if the descriptor ring needs to be cleaned. */
662         if (txq->nb_tx_free < txq->tx_free_thresh)
663                 ixgbe_xmit_cleanup(txq);
664
665         rte_prefetch0(&txe->mbuf->pool);
666
667         /* TX loop */
668         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
669                 new_ctx = 0;
670                 tx_pkt = *tx_pkts++;
671                 pkt_len = tx_pkt->pkt_len;
672
673                 /*
674                  * Determine how many (if any) context descriptors
675                  * are needed for offload functionality.
676                  */
677                 ol_flags = tx_pkt->ol_flags;
678 #ifdef RTE_LIBRTE_SECURITY
679                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
680 #endif
681
682                 /* If hardware offload required */
683                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
684                 if (tx_ol_req) {
685                         tx_offload.l2_len = tx_pkt->l2_len;
686                         tx_offload.l3_len = tx_pkt->l3_len;
687                         tx_offload.l4_len = tx_pkt->l4_len;
688                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
689                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
690                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
691                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
692 #ifdef RTE_LIBRTE_SECURITY
693                         if (use_ipsec) {
694                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
695                                         (union ixgbe_crypto_tx_desc_md *)
696                                                         &tx_pkt->udata64;
697                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
698                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
699                         }
700 #endif
701
702                         /* If new context need be built or reuse the exist ctx. */
703                         ctx = what_advctx_update(txq, tx_ol_req,
704                                 tx_offload);
705                         /* Only allocate context descriptor if required*/
706                         new_ctx = (ctx == IXGBE_CTX_NUM);
707                         ctx = txq->ctx_curr;
708                 }
709
710                 /*
711                  * Keep track of how many descriptors are used this loop
712                  * This will always be the number of segments + the number of
713                  * Context descriptors required to transmit the packet
714                  */
715                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
716
717                 if (txp != NULL &&
718                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
719                         /* set RS on the previous packet in the burst */
720                         txp->read.cmd_type_len |=
721                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
722
723                 /*
724                  * The number of descriptors that must be allocated for a
725                  * packet is the number of segments of that packet, plus 1
726                  * Context Descriptor for the hardware offload, if any.
727                  * Determine the last TX descriptor to allocate in the TX ring
728                  * for the packet, starting from the current position (tx_id)
729                  * in the ring.
730                  */
731                 tx_last = (uint16_t) (tx_id + nb_used - 1);
732
733                 /* Circular ring */
734                 if (tx_last >= txq->nb_tx_desc)
735                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
736
737                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
738                            " tx_first=%u tx_last=%u",
739                            (unsigned) txq->port_id,
740                            (unsigned) txq->queue_id,
741                            (unsigned) pkt_len,
742                            (unsigned) tx_id,
743                            (unsigned) tx_last);
744
745                 /*
746                  * Make sure there are enough TX descriptors available to
747                  * transmit the entire packet.
748                  * nb_used better be less than or equal to txq->tx_rs_thresh
749                  */
750                 if (nb_used > txq->nb_tx_free) {
751                         PMD_TX_FREE_LOG(DEBUG,
752                                         "Not enough free TX descriptors "
753                                         "nb_used=%4u nb_free=%4u "
754                                         "(port=%d queue=%d)",
755                                         nb_used, txq->nb_tx_free,
756                                         txq->port_id, txq->queue_id);
757
758                         if (ixgbe_xmit_cleanup(txq) != 0) {
759                                 /* Could not clean any descriptors */
760                                 if (nb_tx == 0)
761                                         return 0;
762                                 goto end_of_tx;
763                         }
764
765                         /* nb_used better be <= txq->tx_rs_thresh */
766                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
767                                 PMD_TX_FREE_LOG(DEBUG,
768                                         "The number of descriptors needed to "
769                                         "transmit the packet exceeds the "
770                                         "RS bit threshold. This will impact "
771                                         "performance."
772                                         "nb_used=%4u nb_free=%4u "
773                                         "tx_rs_thresh=%4u. "
774                                         "(port=%d queue=%d)",
775                                         nb_used, txq->nb_tx_free,
776                                         txq->tx_rs_thresh,
777                                         txq->port_id, txq->queue_id);
778                                 /*
779                                  * Loop here until there are enough TX
780                                  * descriptors or until the ring cannot be
781                                  * cleaned.
782                                  */
783                                 while (nb_used > txq->nb_tx_free) {
784                                         if (ixgbe_xmit_cleanup(txq) != 0) {
785                                                 /*
786                                                  * Could not clean any
787                                                  * descriptors
788                                                  */
789                                                 if (nb_tx == 0)
790                                                         return 0;
791                                                 goto end_of_tx;
792                                         }
793                                 }
794                         }
795                 }
796
797                 /*
798                  * By now there are enough free TX descriptors to transmit
799                  * the packet.
800                  */
801
802                 /*
803                  * Set common flags of all TX Data Descriptors.
804                  *
805                  * The following bits must be set in all Data Descriptors:
806                  *   - IXGBE_ADVTXD_DTYP_DATA
807                  *   - IXGBE_ADVTXD_DCMD_DEXT
808                  *
809                  * The following bits must be set in the first Data Descriptor
810                  * and are ignored in the other ones:
811                  *   - IXGBE_ADVTXD_DCMD_IFCS
812                  *   - IXGBE_ADVTXD_MAC_1588
813                  *   - IXGBE_ADVTXD_DCMD_VLE
814                  *
815                  * The following bits must only be set in the last Data
816                  * Descriptor:
817                  *   - IXGBE_TXD_CMD_EOP
818                  *
819                  * The following bits can be set in any Data Descriptor, but
820                  * are only set in the last Data Descriptor:
821                  *   - IXGBE_TXD_CMD_RS
822                  */
823                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
824                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
825
826 #ifdef RTE_LIBRTE_IEEE1588
827                 if (ol_flags & PKT_TX_IEEE1588_TMST)
828                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
829 #endif
830
831                 olinfo_status = 0;
832                 if (tx_ol_req) {
833
834                         if (ol_flags & PKT_TX_TCP_SEG) {
835                                 /* when TSO is on, paylen in descriptor is the
836                                  * not the packet len but the tcp payload len */
837                                 pkt_len -= (tx_offload.l2_len +
838                                         tx_offload.l3_len + tx_offload.l4_len);
839                         }
840
841                         /*
842                          * Setup the TX Advanced Context Descriptor if required
843                          */
844                         if (new_ctx) {
845                                 volatile struct ixgbe_adv_tx_context_desc *
846                                     ctx_txd;
847
848                                 ctx_txd = (volatile struct
849                                     ixgbe_adv_tx_context_desc *)
850                                     &txr[tx_id];
851
852                                 txn = &sw_ring[txe->next_id];
853                                 rte_prefetch0(&txn->mbuf->pool);
854
855                                 if (txe->mbuf != NULL) {
856                                         rte_pktmbuf_free_seg(txe->mbuf);
857                                         txe->mbuf = NULL;
858                                 }
859
860                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
861                                         tx_offload, &tx_pkt->udata64);
862
863                                 txe->last_id = tx_last;
864                                 tx_id = txe->next_id;
865                                 txe = txn;
866                         }
867
868                         /*
869                          * Setup the TX Advanced Data Descriptor,
870                          * This path will go through
871                          * whatever new/reuse the context descriptor
872                          */
873                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
874                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
875                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
876                 }
877
878                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
879 #ifdef RTE_LIBRTE_SECURITY
880                 if (use_ipsec)
881                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
882 #endif
883
884                 m_seg = tx_pkt;
885                 do {
886                         txd = &txr[tx_id];
887                         txn = &sw_ring[txe->next_id];
888                         rte_prefetch0(&txn->mbuf->pool);
889
890                         if (txe->mbuf != NULL)
891                                 rte_pktmbuf_free_seg(txe->mbuf);
892                         txe->mbuf = m_seg;
893
894                         /*
895                          * Set up Transmit Data Descriptor.
896                          */
897                         slen = m_seg->data_len;
898                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
899                         txd->read.buffer_addr =
900                                 rte_cpu_to_le_64(buf_dma_addr);
901                         txd->read.cmd_type_len =
902                                 rte_cpu_to_le_32(cmd_type_len | slen);
903                         txd->read.olinfo_status =
904                                 rte_cpu_to_le_32(olinfo_status);
905                         txe->last_id = tx_last;
906                         tx_id = txe->next_id;
907                         txe = txn;
908                         m_seg = m_seg->next;
909                 } while (m_seg != NULL);
910
911                 /*
912                  * The last packet data descriptor needs End Of Packet (EOP)
913                  */
914                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
915                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
916                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
917
918                 /* Set RS bit only on threshold packets' last descriptor */
919                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
920                         PMD_TX_FREE_LOG(DEBUG,
921                                         "Setting RS bit on TXD id="
922                                         "%4u (port=%d queue=%d)",
923                                         tx_last, txq->port_id, txq->queue_id);
924
925                         cmd_type_len |= IXGBE_TXD_CMD_RS;
926
927                         /* Update txq RS bit counters */
928                         txq->nb_tx_used = 0;
929                         txp = NULL;
930                 } else
931                         txp = txd;
932
933                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
934         }
935
936 end_of_tx:
937         /* set RS on last packet in the burst */
938         if (txp != NULL)
939                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
940
941         rte_wmb();
942
943         /*
944          * Set the Transmit Descriptor Tail (TDT)
945          */
946         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
947                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
948                    (unsigned) tx_id, (unsigned) nb_tx);
949         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
950         txq->tx_tail = tx_id;
951
952         return nb_tx;
953 }
954
955 /*********************************************************************
956  *
957  *  TX prep functions
958  *
959  **********************************************************************/
960 uint16_t
961 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
962 {
963         int i, ret;
964         uint64_t ol_flags;
965         struct rte_mbuf *m;
966         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
967
968         for (i = 0; i < nb_pkts; i++) {
969                 m = tx_pkts[i];
970                 ol_flags = m->ol_flags;
971
972                 /**
973                  * Check if packet meets requirements for number of segments
974                  *
975                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
976                  *       non-TSO
977                  */
978
979                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
980                         rte_errno = -EINVAL;
981                         return i;
982                 }
983
984                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
985                         rte_errno = -ENOTSUP;
986                         return i;
987                 }
988
989 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
990                 ret = rte_validate_tx_offload(m);
991                 if (ret != 0) {
992                         rte_errno = ret;
993                         return i;
994                 }
995 #endif
996                 ret = rte_net_intel_cksum_prepare(m);
997                 if (ret != 0) {
998                         rte_errno = ret;
999                         return i;
1000                 }
1001         }
1002
1003         return i;
1004 }
1005
1006 /*********************************************************************
1007  *
1008  *  RX functions
1009  *
1010  **********************************************************************/
1011
1012 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1013 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1014 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1015 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1016 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1017 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1018 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1019 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1020 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1021 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1022 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1023 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1024 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1025 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1026 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1027 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1028 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1029 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1030 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1031 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1032 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1033 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1034 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1035 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1045
1046 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1047 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1048 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1049 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1050 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1051 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1052 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1069
1070 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1071 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1072 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1073 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1074 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1075 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1076 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1093
1094 /**
1095  * Use 2 different table for normal packet and tunnel packet
1096  * to save the space.
1097  */
1098 const uint32_t
1099         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1100         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1101         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1102                 RTE_PTYPE_L3_IPV4,
1103         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1104                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1105         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1106                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1107         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1108                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1109         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1110                 RTE_PTYPE_L3_IPV4_EXT,
1111         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1112                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1113         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1114                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1115         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1116                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1117         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1118                 RTE_PTYPE_L3_IPV6,
1119         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1120                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1121         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1122                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1123         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1124                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1125         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1126                 RTE_PTYPE_L3_IPV6_EXT,
1127         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1128                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1129         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1131         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1132                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1133         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1134                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1135                 RTE_PTYPE_INNER_L3_IPV6,
1136         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1138                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1139         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1140                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1141         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1145         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1147                 RTE_PTYPE_INNER_L3_IPV6,
1148         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1150                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1151         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1157         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1160         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1163         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1169         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1172         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1175         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1179                 RTE_PTYPE_L2_ETHER |
1180                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1181                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1182 };
1183
1184 const uint32_t
1185         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1186         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1187                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1188                 RTE_PTYPE_INNER_L2_ETHER,
1189         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1190                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1191                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1192         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1193                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1194                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1195         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1210                 RTE_PTYPE_INNER_L4_TCP,
1211         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1212                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1213                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1214                 RTE_PTYPE_INNER_L4_TCP,
1215         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1216                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1217                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1218         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1219                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1220                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1221                 RTE_PTYPE_INNER_L4_TCP,
1222         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1223                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1224                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1225                 RTE_PTYPE_INNER_L3_IPV4,
1226         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1227                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1228                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1229                 RTE_PTYPE_INNER_L4_UDP,
1230         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1231                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1232                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1233                 RTE_PTYPE_INNER_L4_UDP,
1234         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1235                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1236                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1237                 RTE_PTYPE_INNER_L4_SCTP,
1238         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1239                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1240                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1241         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1242                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1243                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1244                 RTE_PTYPE_INNER_L4_UDP,
1245         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1246                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1247                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1248                 RTE_PTYPE_INNER_L4_SCTP,
1249         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1250                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1251                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1252                 RTE_PTYPE_INNER_L3_IPV4,
1253         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1254                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1255                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1256                 RTE_PTYPE_INNER_L4_SCTP,
1257         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1258                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1259                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1260                 RTE_PTYPE_INNER_L4_SCTP,
1261         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1262                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1263                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1264                 RTE_PTYPE_INNER_L4_TCP,
1265         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1266                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1267                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1268                 RTE_PTYPE_INNER_L4_UDP,
1269
1270         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1272                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1273         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1274                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1275                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1276                 RTE_PTYPE_INNER_L3_IPV4,
1277         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1278                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1280                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1281         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1282                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                 RTE_PTYPE_INNER_L3_IPV6,
1285         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1286                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1287                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1288                 RTE_PTYPE_INNER_L3_IPV4,
1289         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1290                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1293         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1294                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                 RTE_PTYPE_INNER_L3_IPV4,
1297         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1298                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1301         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1302                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1305         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1306                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                 RTE_PTYPE_INNER_L3_IPV4,
1309         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1310                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1313         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1314                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1315                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1316                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1317         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1318                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1321         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1322                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1323                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1324                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1325         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1326                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1329         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1330                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                 RTE_PTYPE_INNER_L3_IPV4,
1333         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1334                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1335                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1336                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1337         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1338                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1339                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1340                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1341         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1342                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1343                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1344                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1345         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1346                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1347                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1348                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1349         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1350                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1351                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1352                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1353         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1354                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1355                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1356                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1357         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1358                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1359                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1360                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1361 };
1362
1363 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1364 static inline uint32_t
1365 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1366 {
1367
1368         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1369                 return RTE_PTYPE_UNKNOWN;
1370
1371         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1372
1373         /* For tunnel packet */
1374         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1375                 /* Remove the tunnel bit to save the space. */
1376                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1377                 return ptype_table_tn[pkt_info];
1378         }
1379
1380         /**
1381          * For x550, if it's not tunnel,
1382          * tunnel type bit should be set to 0.
1383          * Reuse 82599's mask.
1384          */
1385         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1386
1387         return ptype_table[pkt_info];
1388 }
1389
1390 static inline uint64_t
1391 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1392 {
1393         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1394                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1395                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1396                 PKT_RX_RSS_HASH, 0, 0, 0,
1397                 0, 0, 0,  PKT_RX_FDIR,
1398         };
1399 #ifdef RTE_LIBRTE_IEEE1588
1400         static uint64_t ip_pkt_etqf_map[8] = {
1401                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1402                 0, 0, 0, 0,
1403         };
1404
1405         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1406                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1407                                 ip_rss_types_map[pkt_info & 0XF];
1408         else
1409                 return ip_rss_types_map[pkt_info & 0XF];
1410 #else
1411         return ip_rss_types_map[pkt_info & 0XF];
1412 #endif
1413 }
1414
1415 static inline uint64_t
1416 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1417 {
1418         uint64_t pkt_flags;
1419
1420         /*
1421          * Check if VLAN present only.
1422          * Do not check whether L3/L4 rx checksum done by NIC or not,
1423          * That can be found from rte_eth_rxmode.offloads flag
1424          */
1425         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1426
1427 #ifdef RTE_LIBRTE_IEEE1588
1428         if (rx_status & IXGBE_RXD_STAT_TMST)
1429                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1430 #endif
1431         return pkt_flags;
1432 }
1433
1434 static inline uint64_t
1435 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1436 {
1437         uint64_t pkt_flags;
1438
1439         /*
1440          * Bit 31: IPE, IPv4 checksum error
1441          * Bit 30: L4I, L4I integrity error
1442          */
1443         static uint64_t error_to_pkt_flags_map[4] = {
1444                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1445                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1446                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1447                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1448         };
1449         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1450                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1451
1452         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1453             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1454                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1455         }
1456
1457 #ifdef RTE_LIBRTE_SECURITY
1458         if (rx_status & IXGBE_RXD_STAT_SECP) {
1459                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1460                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1461                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1462         }
1463 #endif
1464
1465         return pkt_flags;
1466 }
1467
1468 /*
1469  * LOOK_AHEAD defines how many desc statuses to check beyond the
1470  * current descriptor.
1471  * It must be a pound define for optimal performance.
1472  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1473  * function only works with LOOK_AHEAD=8.
1474  */
1475 #define LOOK_AHEAD 8
1476 #if (LOOK_AHEAD != 8)
1477 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1478 #endif
1479 static inline int
1480 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1481 {
1482         volatile union ixgbe_adv_rx_desc *rxdp;
1483         struct ixgbe_rx_entry *rxep;
1484         struct rte_mbuf *mb;
1485         uint16_t pkt_len;
1486         uint64_t pkt_flags;
1487         int nb_dd;
1488         uint32_t s[LOOK_AHEAD];
1489         uint32_t pkt_info[LOOK_AHEAD];
1490         int i, j, nb_rx = 0;
1491         uint32_t status;
1492         uint64_t vlan_flags = rxq->vlan_flags;
1493
1494         /* get references to current descriptor and S/W ring entry */
1495         rxdp = &rxq->rx_ring[rxq->rx_tail];
1496         rxep = &rxq->sw_ring[rxq->rx_tail];
1497
1498         status = rxdp->wb.upper.status_error;
1499         /* check to make sure there is at least 1 packet to receive */
1500         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1501                 return 0;
1502
1503         /*
1504          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1505          * reference packets that are ready to be received.
1506          */
1507         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1508              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1509                 /* Read desc statuses backwards to avoid race condition */
1510                 for (j = 0; j < LOOK_AHEAD; j++)
1511                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1512
1513                 rte_smp_rmb();
1514
1515                 /* Compute how many status bits were set */
1516                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1517                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1518                         ;
1519
1520                 for (j = 0; j < nb_dd; j++)
1521                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1522                                                        lo_dword.data);
1523
1524                 nb_rx += nb_dd;
1525
1526                 /* Translate descriptor info to mbuf format */
1527                 for (j = 0; j < nb_dd; ++j) {
1528                         mb = rxep[j].mbuf;
1529                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1530                                   rxq->crc_len;
1531                         mb->data_len = pkt_len;
1532                         mb->pkt_len = pkt_len;
1533                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1534
1535                         /* convert descriptor fields to rte mbuf flags */
1536                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1537                                 vlan_flags);
1538                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1539                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1540                                         ((uint16_t)pkt_info[j]);
1541                         mb->ol_flags = pkt_flags;
1542                         mb->packet_type =
1543                                 ixgbe_rxd_pkt_info_to_pkt_type
1544                                         (pkt_info[j], rxq->pkt_type_mask);
1545
1546                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1547                                 mb->hash.rss = rte_le_to_cpu_32(
1548                                     rxdp[j].wb.lower.hi_dword.rss);
1549                         else if (pkt_flags & PKT_RX_FDIR) {
1550                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1551                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1552                                     IXGBE_ATR_HASH_MASK;
1553                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1554                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1555                         }
1556                 }
1557
1558                 /* Move mbuf pointers from the S/W ring to the stage */
1559                 for (j = 0; j < LOOK_AHEAD; ++j) {
1560                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1561                 }
1562
1563                 /* stop if all requested packets could not be received */
1564                 if (nb_dd != LOOK_AHEAD)
1565                         break;
1566         }
1567
1568         /* clear software ring entries so we can cleanup correctly */
1569         for (i = 0; i < nb_rx; ++i) {
1570                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1571         }
1572
1573
1574         return nb_rx;
1575 }
1576
1577 static inline int
1578 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1579 {
1580         volatile union ixgbe_adv_rx_desc *rxdp;
1581         struct ixgbe_rx_entry *rxep;
1582         struct rte_mbuf *mb;
1583         uint16_t alloc_idx;
1584         __le64 dma_addr;
1585         int diag, i;
1586
1587         /* allocate buffers in bulk directly into the S/W ring */
1588         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1589         rxep = &rxq->sw_ring[alloc_idx];
1590         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1591                                     rxq->rx_free_thresh);
1592         if (unlikely(diag != 0))
1593                 return -ENOMEM;
1594
1595         rxdp = &rxq->rx_ring[alloc_idx];
1596         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1597                 /* populate the static rte mbuf fields */
1598                 mb = rxep[i].mbuf;
1599                 if (reset_mbuf) {
1600                         mb->port = rxq->port_id;
1601                 }
1602
1603                 rte_mbuf_refcnt_set(mb, 1);
1604                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1605
1606                 /* populate the descriptors */
1607                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1608                 rxdp[i].read.hdr_addr = 0;
1609                 rxdp[i].read.pkt_addr = dma_addr;
1610         }
1611
1612         /* update state of internal queue structure */
1613         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1614         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1615                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1616
1617         /* no errors */
1618         return 0;
1619 }
1620
1621 static inline uint16_t
1622 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1623                          uint16_t nb_pkts)
1624 {
1625         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1626         int i;
1627
1628         /* how many packets are ready to return? */
1629         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1630
1631         /* copy mbuf pointers to the application's packet list */
1632         for (i = 0; i < nb_pkts; ++i)
1633                 rx_pkts[i] = stage[i];
1634
1635         /* update internal queue state */
1636         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1637         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1638
1639         return nb_pkts;
1640 }
1641
1642 static inline uint16_t
1643 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1644              uint16_t nb_pkts)
1645 {
1646         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1647         uint16_t nb_rx = 0;
1648
1649         /* Any previously recv'd pkts will be returned from the Rx stage */
1650         if (rxq->rx_nb_avail)
1651                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1652
1653         /* Scan the H/W ring for packets to receive */
1654         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1655
1656         /* update internal queue state */
1657         rxq->rx_next_avail = 0;
1658         rxq->rx_nb_avail = nb_rx;
1659         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1660
1661         /* if required, allocate new buffers to replenish descriptors */
1662         if (rxq->rx_tail > rxq->rx_free_trigger) {
1663                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1664
1665                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1666                         int i, j;
1667
1668                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1669                                    "queue_id=%u", (unsigned) rxq->port_id,
1670                                    (unsigned) rxq->queue_id);
1671
1672                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1673                                 rxq->rx_free_thresh;
1674
1675                         /*
1676                          * Need to rewind any previous receives if we cannot
1677                          * allocate new buffers to replenish the old ones.
1678                          */
1679                         rxq->rx_nb_avail = 0;
1680                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1681                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1682                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1683
1684                         return 0;
1685                 }
1686
1687                 /* update tail pointer */
1688                 rte_wmb();
1689                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1690                                             cur_free_trigger);
1691         }
1692
1693         if (rxq->rx_tail >= rxq->nb_rx_desc)
1694                 rxq->rx_tail = 0;
1695
1696         /* received any packets this loop? */
1697         if (rxq->rx_nb_avail)
1698                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1699
1700         return 0;
1701 }
1702
1703 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1704 uint16_t
1705 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1706                            uint16_t nb_pkts)
1707 {
1708         uint16_t nb_rx;
1709
1710         if (unlikely(nb_pkts == 0))
1711                 return 0;
1712
1713         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1714                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1715
1716         /* request is relatively large, chunk it up */
1717         nb_rx = 0;
1718         while (nb_pkts) {
1719                 uint16_t ret, n;
1720
1721                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1722                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1723                 nb_rx = (uint16_t)(nb_rx + ret);
1724                 nb_pkts = (uint16_t)(nb_pkts - ret);
1725                 if (ret < n)
1726                         break;
1727         }
1728
1729         return nb_rx;
1730 }
1731
1732 uint16_t
1733 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1734                 uint16_t nb_pkts)
1735 {
1736         struct ixgbe_rx_queue *rxq;
1737         volatile union ixgbe_adv_rx_desc *rx_ring;
1738         volatile union ixgbe_adv_rx_desc *rxdp;
1739         struct ixgbe_rx_entry *sw_ring;
1740         struct ixgbe_rx_entry *rxe;
1741         struct rte_mbuf *rxm;
1742         struct rte_mbuf *nmb;
1743         union ixgbe_adv_rx_desc rxd;
1744         uint64_t dma_addr;
1745         uint32_t staterr;
1746         uint32_t pkt_info;
1747         uint16_t pkt_len;
1748         uint16_t rx_id;
1749         uint16_t nb_rx;
1750         uint16_t nb_hold;
1751         uint64_t pkt_flags;
1752         uint64_t vlan_flags;
1753
1754         nb_rx = 0;
1755         nb_hold = 0;
1756         rxq = rx_queue;
1757         rx_id = rxq->rx_tail;
1758         rx_ring = rxq->rx_ring;
1759         sw_ring = rxq->sw_ring;
1760         vlan_flags = rxq->vlan_flags;
1761         while (nb_rx < nb_pkts) {
1762                 /*
1763                  * The order of operations here is important as the DD status
1764                  * bit must not be read after any other descriptor fields.
1765                  * rx_ring and rxdp are pointing to volatile data so the order
1766                  * of accesses cannot be reordered by the compiler. If they were
1767                  * not volatile, they could be reordered which could lead to
1768                  * using invalid descriptor fields when read from rxd.
1769                  */
1770                 rxdp = &rx_ring[rx_id];
1771                 staterr = rxdp->wb.upper.status_error;
1772                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1773                         break;
1774                 rxd = *rxdp;
1775
1776                 /*
1777                  * End of packet.
1778                  *
1779                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1780                  * is likely to be invalid and to be dropped by the various
1781                  * validation checks performed by the network stack.
1782                  *
1783                  * Allocate a new mbuf to replenish the RX ring descriptor.
1784                  * If the allocation fails:
1785                  *    - arrange for that RX descriptor to be the first one
1786                  *      being parsed the next time the receive function is
1787                  *      invoked [on the same queue].
1788                  *
1789                  *    - Stop parsing the RX ring and return immediately.
1790                  *
1791                  * This policy do not drop the packet received in the RX
1792                  * descriptor for which the allocation of a new mbuf failed.
1793                  * Thus, it allows that packet to be later retrieved if
1794                  * mbuf have been freed in the mean time.
1795                  * As a side effect, holding RX descriptors instead of
1796                  * systematically giving them back to the NIC may lead to
1797                  * RX ring exhaustion situations.
1798                  * However, the NIC can gracefully prevent such situations
1799                  * to happen by sending specific "back-pressure" flow control
1800                  * frames to its peer(s).
1801                  */
1802                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1803                            "ext_err_stat=0x%08x pkt_len=%u",
1804                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1805                            (unsigned) rx_id, (unsigned) staterr,
1806                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1807
1808                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1809                 if (nmb == NULL) {
1810                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1811                                    "queue_id=%u", (unsigned) rxq->port_id,
1812                                    (unsigned) rxq->queue_id);
1813                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1814                         break;
1815                 }
1816
1817                 nb_hold++;
1818                 rxe = &sw_ring[rx_id];
1819                 rx_id++;
1820                 if (rx_id == rxq->nb_rx_desc)
1821                         rx_id = 0;
1822
1823                 /* Prefetch next mbuf while processing current one. */
1824                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1825
1826                 /*
1827                  * When next RX descriptor is on a cache-line boundary,
1828                  * prefetch the next 4 RX descriptors and the next 8 pointers
1829                  * to mbufs.
1830                  */
1831                 if ((rx_id & 0x3) == 0) {
1832                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1833                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1834                 }
1835
1836                 rxm = rxe->mbuf;
1837                 rxe->mbuf = nmb;
1838                 dma_addr =
1839                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1840                 rxdp->read.hdr_addr = 0;
1841                 rxdp->read.pkt_addr = dma_addr;
1842
1843                 /*
1844                  * Initialize the returned mbuf.
1845                  * 1) setup generic mbuf fields:
1846                  *    - number of segments,
1847                  *    - next segment,
1848                  *    - packet length,
1849                  *    - RX port identifier.
1850                  * 2) integrate hardware offload data, if any:
1851                  *    - RSS flag & hash,
1852                  *    - IP checksum flag,
1853                  *    - VLAN TCI, if any,
1854                  *    - error flags.
1855                  */
1856                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1857                                       rxq->crc_len);
1858                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1859                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1860                 rxm->nb_segs = 1;
1861                 rxm->next = NULL;
1862                 rxm->pkt_len = pkt_len;
1863                 rxm->data_len = pkt_len;
1864                 rxm->port = rxq->port_id;
1865
1866                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1867                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1868                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1869
1870                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1871                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1872                 pkt_flags = pkt_flags |
1873                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1874                 rxm->ol_flags = pkt_flags;
1875                 rxm->packet_type =
1876                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1877                                                        rxq->pkt_type_mask);
1878
1879                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1880                         rxm->hash.rss = rte_le_to_cpu_32(
1881                                                 rxd.wb.lower.hi_dword.rss);
1882                 else if (pkt_flags & PKT_RX_FDIR) {
1883                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1884                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1885                                         IXGBE_ATR_HASH_MASK;
1886                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1887                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1888                 }
1889                 /*
1890                  * Store the mbuf address into the next entry of the array
1891                  * of returned packets.
1892                  */
1893                 rx_pkts[nb_rx++] = rxm;
1894         }
1895         rxq->rx_tail = rx_id;
1896
1897         /*
1898          * If the number of free RX descriptors is greater than the RX free
1899          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1900          * register.
1901          * Update the RDT with the value of the last processed RX descriptor
1902          * minus 1, to guarantee that the RDT register is never equal to the
1903          * RDH register, which creates a "full" ring situtation from the
1904          * hardware point of view...
1905          */
1906         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1907         if (nb_hold > rxq->rx_free_thresh) {
1908                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1909                            "nb_hold=%u nb_rx=%u",
1910                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1911                            (unsigned) rx_id, (unsigned) nb_hold,
1912                            (unsigned) nb_rx);
1913                 rx_id = (uint16_t) ((rx_id == 0) ?
1914                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1915                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1916                 nb_hold = 0;
1917         }
1918         rxq->nb_rx_hold = nb_hold;
1919         return nb_rx;
1920 }
1921
1922 /**
1923  * Detect an RSC descriptor.
1924  */
1925 static inline uint32_t
1926 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1927 {
1928         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1929                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1930 }
1931
1932 /**
1933  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1934  *
1935  * Fill the following info in the HEAD buffer of the Rx cluster:
1936  *    - RX port identifier
1937  *    - hardware offload data, if any:
1938  *      - RSS flag & hash
1939  *      - IP checksum flag
1940  *      - VLAN TCI, if any
1941  *      - error flags
1942  * @head HEAD of the packet cluster
1943  * @desc HW descriptor to get data from
1944  * @rxq Pointer to the Rx queue
1945  */
1946 static inline void
1947 ixgbe_fill_cluster_head_buf(
1948         struct rte_mbuf *head,
1949         union ixgbe_adv_rx_desc *desc,
1950         struct ixgbe_rx_queue *rxq,
1951         uint32_t staterr)
1952 {
1953         uint32_t pkt_info;
1954         uint64_t pkt_flags;
1955
1956         head->port = rxq->port_id;
1957
1958         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1959          * set in the pkt_flags field.
1960          */
1961         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1962         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1963         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1964         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1965         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1966         head->ol_flags = pkt_flags;
1967         head->packet_type =
1968                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1969
1970         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1971                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1972         else if (pkt_flags & PKT_RX_FDIR) {
1973                 head->hash.fdir.hash =
1974                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1975                                                           & IXGBE_ATR_HASH_MASK;
1976                 head->hash.fdir.id =
1977                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1978         }
1979 }
1980
1981 /**
1982  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1983  *
1984  * @rx_queue Rx queue handle
1985  * @rx_pkts table of received packets
1986  * @nb_pkts size of rx_pkts table
1987  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1988  *
1989  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1990  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1991  *
1992  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1993  * 1) When non-EOP RSC completion arrives:
1994  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1995  *       segment's data length.
1996  *    b) Set the "next" pointer of the current segment to point to the segment
1997  *       at the NEXTP index.
1998  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1999  *       in the sw_rsc_ring.
2000  * 2) When EOP arrives we just update the cluster's total length and offload
2001  *    flags and deliver the cluster up to the upper layers. In our case - put it
2002  *    in the rx_pkts table.
2003  *
2004  * Returns the number of received packets/clusters (according to the "bulk
2005  * receive" interface).
2006  */
2007 static inline uint16_t
2008 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2009                     bool bulk_alloc)
2010 {
2011         struct ixgbe_rx_queue *rxq = rx_queue;
2012         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2013         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2014         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2015         uint16_t rx_id = rxq->rx_tail;
2016         uint16_t nb_rx = 0;
2017         uint16_t nb_hold = rxq->nb_rx_hold;
2018         uint16_t prev_id = rxq->rx_tail;
2019
2020         while (nb_rx < nb_pkts) {
2021                 bool eop;
2022                 struct ixgbe_rx_entry *rxe;
2023                 struct ixgbe_scattered_rx_entry *sc_entry;
2024                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2025                 struct ixgbe_rx_entry *next_rxe = NULL;
2026                 struct rte_mbuf *first_seg;
2027                 struct rte_mbuf *rxm;
2028                 struct rte_mbuf *nmb;
2029                 union ixgbe_adv_rx_desc rxd;
2030                 uint16_t data_len;
2031                 uint16_t next_id;
2032                 volatile union ixgbe_adv_rx_desc *rxdp;
2033                 uint32_t staterr;
2034
2035 next_desc:
2036                 /*
2037                  * The code in this whole file uses the volatile pointer to
2038                  * ensure the read ordering of the status and the rest of the
2039                  * descriptor fields (on the compiler level only!!!). This is so
2040                  * UGLY - why not to just use the compiler barrier instead? DPDK
2041                  * even has the rte_compiler_barrier() for that.
2042                  *
2043                  * But most importantly this is just wrong because this doesn't
2044                  * ensure memory ordering in a general case at all. For
2045                  * instance, DPDK is supposed to work on Power CPUs where
2046                  * compiler barrier may just not be enough!
2047                  *
2048                  * I tried to write only this function properly to have a
2049                  * starting point (as a part of an LRO/RSC series) but the
2050                  * compiler cursed at me when I tried to cast away the
2051                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2052                  * keeping it the way it is for now.
2053                  *
2054                  * The code in this file is broken in so many other places and
2055                  * will just not work on a big endian CPU anyway therefore the
2056                  * lines below will have to be revisited together with the rest
2057                  * of the ixgbe PMD.
2058                  *
2059                  * TODO:
2060                  *    - Get rid of "volatile" and let the compiler do its job.
2061                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2062                  *      memory ordering below.
2063                  */
2064                 rxdp = &rx_ring[rx_id];
2065                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2066
2067                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2068                         break;
2069
2070                 rxd = *rxdp;
2071
2072                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2073                                   "staterr=0x%x data_len=%u",
2074                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2075                            rte_le_to_cpu_16(rxd.wb.upper.length));
2076
2077                 if (!bulk_alloc) {
2078                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2079                         if (nmb == NULL) {
2080                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2081                                                   "port_id=%u queue_id=%u",
2082                                            rxq->port_id, rxq->queue_id);
2083
2084                                 rte_eth_devices[rxq->port_id].data->
2085                                                         rx_mbuf_alloc_failed++;
2086                                 break;
2087                         }
2088                 } else if (nb_hold > rxq->rx_free_thresh) {
2089                         uint16_t next_rdt = rxq->rx_free_trigger;
2090
2091                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2092                                 rte_wmb();
2093                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2094                                                             next_rdt);
2095                                 nb_hold -= rxq->rx_free_thresh;
2096                         } else {
2097                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2098                                                   "port_id=%u queue_id=%u",
2099                                            rxq->port_id, rxq->queue_id);
2100
2101                                 rte_eth_devices[rxq->port_id].data->
2102                                                         rx_mbuf_alloc_failed++;
2103                                 break;
2104                         }
2105                 }
2106
2107                 nb_hold++;
2108                 rxe = &sw_ring[rx_id];
2109                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2110
2111                 next_id = rx_id + 1;
2112                 if (next_id == rxq->nb_rx_desc)
2113                         next_id = 0;
2114
2115                 /* Prefetch next mbuf while processing current one. */
2116                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2117
2118                 /*
2119                  * When next RX descriptor is on a cache-line boundary,
2120                  * prefetch the next 4 RX descriptors and the next 4 pointers
2121                  * to mbufs.
2122                  */
2123                 if ((next_id & 0x3) == 0) {
2124                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2125                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2126                 }
2127
2128                 rxm = rxe->mbuf;
2129
2130                 if (!bulk_alloc) {
2131                         __le64 dma =
2132                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2133                         /*
2134                          * Update RX descriptor with the physical address of the
2135                          * new data buffer of the new allocated mbuf.
2136                          */
2137                         rxe->mbuf = nmb;
2138
2139                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2140                         rxdp->read.hdr_addr = 0;
2141                         rxdp->read.pkt_addr = dma;
2142                 } else
2143                         rxe->mbuf = NULL;
2144
2145                 /*
2146                  * Set data length & data buffer address of mbuf.
2147                  */
2148                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2149                 rxm->data_len = data_len;
2150
2151                 if (!eop) {
2152                         uint16_t nextp_id;
2153                         /*
2154                          * Get next descriptor index:
2155                          *  - For RSC it's in the NEXTP field.
2156                          *  - For a scattered packet - it's just a following
2157                          *    descriptor.
2158                          */
2159                         if (ixgbe_rsc_count(&rxd))
2160                                 nextp_id =
2161                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2162                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2163                         else
2164                                 nextp_id = next_id;
2165
2166                         next_sc_entry = &sw_sc_ring[nextp_id];
2167                         next_rxe = &sw_ring[nextp_id];
2168                         rte_ixgbe_prefetch(next_rxe);
2169                 }
2170
2171                 sc_entry = &sw_sc_ring[rx_id];
2172                 first_seg = sc_entry->fbuf;
2173                 sc_entry->fbuf = NULL;
2174
2175                 /*
2176                  * If this is the first buffer of the received packet,
2177                  * set the pointer to the first mbuf of the packet and
2178                  * initialize its context.
2179                  * Otherwise, update the total length and the number of segments
2180                  * of the current scattered packet, and update the pointer to
2181                  * the last mbuf of the current packet.
2182                  */
2183                 if (first_seg == NULL) {
2184                         first_seg = rxm;
2185                         first_seg->pkt_len = data_len;
2186                         first_seg->nb_segs = 1;
2187                 } else {
2188                         first_seg->pkt_len += data_len;
2189                         first_seg->nb_segs++;
2190                 }
2191
2192                 prev_id = rx_id;
2193                 rx_id = next_id;
2194
2195                 /*
2196                  * If this is not the last buffer of the received packet, update
2197                  * the pointer to the first mbuf at the NEXTP entry in the
2198                  * sw_sc_ring and continue to parse the RX ring.
2199                  */
2200                 if (!eop && next_rxe) {
2201                         rxm->next = next_rxe->mbuf;
2202                         next_sc_entry->fbuf = first_seg;
2203                         goto next_desc;
2204                 }
2205
2206                 /* Initialize the first mbuf of the returned packet */
2207                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2208
2209                 /*
2210                  * Deal with the case, when HW CRC srip is disabled.
2211                  * That can't happen when LRO is enabled, but still could
2212                  * happen for scattered RX mode.
2213                  */
2214                 first_seg->pkt_len -= rxq->crc_len;
2215                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2216                         struct rte_mbuf *lp;
2217
2218                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2219                                 ;
2220
2221                         first_seg->nb_segs--;
2222                         lp->data_len -= rxq->crc_len - rxm->data_len;
2223                         lp->next = NULL;
2224                         rte_pktmbuf_free_seg(rxm);
2225                 } else
2226                         rxm->data_len -= rxq->crc_len;
2227
2228                 /* Prefetch data of first segment, if configured to do so. */
2229                 rte_packet_prefetch((char *)first_seg->buf_addr +
2230                         first_seg->data_off);
2231
2232                 /*
2233                  * Store the mbuf address into the next entry of the array
2234                  * of returned packets.
2235                  */
2236                 rx_pkts[nb_rx++] = first_seg;
2237         }
2238
2239         /*
2240          * Record index of the next RX descriptor to probe.
2241          */
2242         rxq->rx_tail = rx_id;
2243
2244         /*
2245          * If the number of free RX descriptors is greater than the RX free
2246          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2247          * register.
2248          * Update the RDT with the value of the last processed RX descriptor
2249          * minus 1, to guarantee that the RDT register is never equal to the
2250          * RDH register, which creates a "full" ring situtation from the
2251          * hardware point of view...
2252          */
2253         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2254                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2255                            "nb_hold=%u nb_rx=%u",
2256                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2257
2258                 rte_wmb();
2259                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2260                 nb_hold = 0;
2261         }
2262
2263         rxq->nb_rx_hold = nb_hold;
2264         return nb_rx;
2265 }
2266
2267 uint16_t
2268 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2269                                  uint16_t nb_pkts)
2270 {
2271         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2272 }
2273
2274 uint16_t
2275 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2276                                uint16_t nb_pkts)
2277 {
2278         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2279 }
2280
2281 /*********************************************************************
2282  *
2283  *  Queue management functions
2284  *
2285  **********************************************************************/
2286
2287 static void __attribute__((cold))
2288 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2289 {
2290         unsigned i;
2291
2292         if (txq->sw_ring != NULL) {
2293                 for (i = 0; i < txq->nb_tx_desc; i++) {
2294                         if (txq->sw_ring[i].mbuf != NULL) {
2295                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2296                                 txq->sw_ring[i].mbuf = NULL;
2297                         }
2298                 }
2299         }
2300 }
2301
2302 static void __attribute__((cold))
2303 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2304 {
2305         if (txq != NULL &&
2306             txq->sw_ring != NULL)
2307                 rte_free(txq->sw_ring);
2308 }
2309
2310 static void __attribute__((cold))
2311 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2312 {
2313         if (txq != NULL && txq->ops != NULL) {
2314                 txq->ops->release_mbufs(txq);
2315                 txq->ops->free_swring(txq);
2316                 rte_free(txq);
2317         }
2318 }
2319
2320 void __attribute__((cold))
2321 ixgbe_dev_tx_queue_release(void *txq)
2322 {
2323         ixgbe_tx_queue_release(txq);
2324 }
2325
2326 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2327 static void __attribute__((cold))
2328 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2329 {
2330         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2331         struct ixgbe_tx_entry *txe = txq->sw_ring;
2332         uint16_t prev, i;
2333
2334         /* Zero out HW ring memory */
2335         for (i = 0; i < txq->nb_tx_desc; i++) {
2336                 txq->tx_ring[i] = zeroed_desc;
2337         }
2338
2339         /* Initialize SW ring entries */
2340         prev = (uint16_t) (txq->nb_tx_desc - 1);
2341         for (i = 0; i < txq->nb_tx_desc; i++) {
2342                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2343
2344                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2345                 txe[i].mbuf = NULL;
2346                 txe[i].last_id = i;
2347                 txe[prev].next_id = i;
2348                 prev = i;
2349         }
2350
2351         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2352         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2353
2354         txq->tx_tail = 0;
2355         txq->nb_tx_used = 0;
2356         /*
2357          * Always allow 1 descriptor to be un-allocated to avoid
2358          * a H/W race condition
2359          */
2360         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2361         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2362         txq->ctx_curr = 0;
2363         memset((void *)&txq->ctx_cache, 0,
2364                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2365 }
2366
2367 static const struct ixgbe_txq_ops def_txq_ops = {
2368         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2369         .free_swring = ixgbe_tx_free_swring,
2370         .reset = ixgbe_reset_tx_queue,
2371 };
2372
2373 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2374  * the queue parameters. Used in tx_queue_setup by primary process and then
2375  * in dev_init by secondary process when attaching to an existing ethdev.
2376  */
2377 void __attribute__((cold))
2378 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2379 {
2380         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2381         if ((txq->offloads == 0) &&
2382 #ifdef RTE_LIBRTE_SECURITY
2383                         !(txq->using_ipsec) &&
2384 #endif
2385                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2386                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2387                 dev->tx_pkt_prepare = NULL;
2388 #ifdef RTE_IXGBE_INC_VECTOR
2389                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2390                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2391                                         ixgbe_txq_vec_setup(txq) == 0)) {
2392                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2393                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2394                 } else
2395 #endif
2396                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2397         } else {
2398                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2399                 PMD_INIT_LOG(DEBUG,
2400                                 " - offloads = 0x%" PRIx64,
2401                                 txq->offloads);
2402                 PMD_INIT_LOG(DEBUG,
2403                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2404                                 (unsigned long)txq->tx_rs_thresh,
2405                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2406                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2407                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2408         }
2409 }
2410
2411 uint64_t
2412 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2413 {
2414         RTE_SET_USED(dev);
2415
2416         return 0;
2417 }
2418
2419 uint64_t
2420 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2421 {
2422         uint64_t tx_offload_capa;
2423         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2424
2425         tx_offload_capa =
2426                 DEV_TX_OFFLOAD_VLAN_INSERT |
2427                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2428                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2429                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2430                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2431                 DEV_TX_OFFLOAD_TCP_TSO     |
2432                 DEV_TX_OFFLOAD_MULTI_SEGS;
2433
2434         if (hw->mac.type == ixgbe_mac_82599EB ||
2435             hw->mac.type == ixgbe_mac_X540)
2436                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2437
2438         if (hw->mac.type == ixgbe_mac_X550 ||
2439             hw->mac.type == ixgbe_mac_X550EM_x ||
2440             hw->mac.type == ixgbe_mac_X550EM_a)
2441                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2442
2443 #ifdef RTE_LIBRTE_SECURITY
2444         if (dev->security_ctx)
2445                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2446 #endif
2447         return tx_offload_capa;
2448 }
2449
2450 int __attribute__((cold))
2451 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2452                          uint16_t queue_idx,
2453                          uint16_t nb_desc,
2454                          unsigned int socket_id,
2455                          const struct rte_eth_txconf *tx_conf)
2456 {
2457         const struct rte_memzone *tz;
2458         struct ixgbe_tx_queue *txq;
2459         struct ixgbe_hw     *hw;
2460         uint16_t tx_rs_thresh, tx_free_thresh;
2461         uint64_t offloads;
2462
2463         PMD_INIT_FUNC_TRACE();
2464         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2465
2466         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2467
2468         /*
2469          * Validate number of transmit descriptors.
2470          * It must not exceed hardware maximum, and must be multiple
2471          * of IXGBE_ALIGN.
2472          */
2473         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2474                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2475                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2476                 return -EINVAL;
2477         }
2478
2479         /*
2480          * The following two parameters control the setting of the RS bit on
2481          * transmit descriptors.
2482          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2483          * descriptors have been used.
2484          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2485          * descriptors are used or if the number of descriptors required
2486          * to transmit a packet is greater than the number of free TX
2487          * descriptors.
2488          * The following constraints must be satisfied:
2489          *  tx_rs_thresh must be greater than 0.
2490          *  tx_rs_thresh must be less than the size of the ring minus 2.
2491          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2492          *  tx_rs_thresh must be a divisor of the ring size.
2493          *  tx_free_thresh must be greater than 0.
2494          *  tx_free_thresh must be less than the size of the ring minus 3.
2495          * One descriptor in the TX ring is used as a sentinel to avoid a
2496          * H/W race condition, hence the maximum threshold constraints.
2497          * When set to zero use default values.
2498          */
2499         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2500                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2501         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2502                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2503         if (tx_rs_thresh >= (nb_desc - 2)) {
2504                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2505                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2506                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2507                         (int)dev->data->port_id, (int)queue_idx);
2508                 return -(EINVAL);
2509         }
2510         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2511                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2512                         "(tx_rs_thresh=%u port=%d queue=%d)",
2513                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2514                         (int)dev->data->port_id, (int)queue_idx);
2515                 return -(EINVAL);
2516         }
2517         if (tx_free_thresh >= (nb_desc - 3)) {
2518                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2519                              "tx_free_thresh must be less than the number of "
2520                              "TX descriptors minus 3. (tx_free_thresh=%u "
2521                              "port=%d queue=%d)",
2522                              (unsigned int)tx_free_thresh,
2523                              (int)dev->data->port_id, (int)queue_idx);
2524                 return -(EINVAL);
2525         }
2526         if (tx_rs_thresh > tx_free_thresh) {
2527                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2528                              "tx_free_thresh. (tx_free_thresh=%u "
2529                              "tx_rs_thresh=%u port=%d queue=%d)",
2530                              (unsigned int)tx_free_thresh,
2531                              (unsigned int)tx_rs_thresh,
2532                              (int)dev->data->port_id,
2533                              (int)queue_idx);
2534                 return -(EINVAL);
2535         }
2536         if ((nb_desc % tx_rs_thresh) != 0) {
2537                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2538                              "number of TX descriptors. (tx_rs_thresh=%u "
2539                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2540                              (int)dev->data->port_id, (int)queue_idx);
2541                 return -(EINVAL);
2542         }
2543
2544         /*
2545          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2546          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2547          * by the NIC and all descriptors are written back after the NIC
2548          * accumulates WTHRESH descriptors.
2549          */
2550         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2551                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2552                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2553                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2554                              (int)dev->data->port_id, (int)queue_idx);
2555                 return -(EINVAL);
2556         }
2557
2558         /* Free memory prior to re-allocation if needed... */
2559         if (dev->data->tx_queues[queue_idx] != NULL) {
2560                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2561                 dev->data->tx_queues[queue_idx] = NULL;
2562         }
2563
2564         /* First allocate the tx queue data structure */
2565         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2566                                  RTE_CACHE_LINE_SIZE, socket_id);
2567         if (txq == NULL)
2568                 return -ENOMEM;
2569
2570         /*
2571          * Allocate TX ring hardware descriptors. A memzone large enough to
2572          * handle the maximum ring size is allocated in order to allow for
2573          * resizing in later calls to the queue setup function.
2574          */
2575         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2576                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2577                         IXGBE_ALIGN, socket_id);
2578         if (tz == NULL) {
2579                 ixgbe_tx_queue_release(txq);
2580                 return -ENOMEM;
2581         }
2582
2583         txq->nb_tx_desc = nb_desc;
2584         txq->tx_rs_thresh = tx_rs_thresh;
2585         txq->tx_free_thresh = tx_free_thresh;
2586         txq->pthresh = tx_conf->tx_thresh.pthresh;
2587         txq->hthresh = tx_conf->tx_thresh.hthresh;
2588         txq->wthresh = tx_conf->tx_thresh.wthresh;
2589         txq->queue_id = queue_idx;
2590         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2591                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2592         txq->port_id = dev->data->port_id;
2593         txq->offloads = offloads;
2594         txq->ops = &def_txq_ops;
2595         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2596 #ifdef RTE_LIBRTE_SECURITY
2597         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2598                         DEV_TX_OFFLOAD_SECURITY);
2599 #endif
2600
2601         /*
2602          * Modification to set VFTDT for virtual function if vf is detected
2603          */
2604         if (hw->mac.type == ixgbe_mac_82599_vf ||
2605             hw->mac.type == ixgbe_mac_X540_vf ||
2606             hw->mac.type == ixgbe_mac_X550_vf ||
2607             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2608             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2609                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2610         else
2611                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2612
2613         txq->tx_ring_phys_addr = tz->iova;
2614         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2615
2616         /* Allocate software ring */
2617         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2618                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2619                                 RTE_CACHE_LINE_SIZE, socket_id);
2620         if (txq->sw_ring == NULL) {
2621                 ixgbe_tx_queue_release(txq);
2622                 return -ENOMEM;
2623         }
2624         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2625                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2626
2627         /* set up vector or scalar TX function as appropriate */
2628         ixgbe_set_tx_function(dev, txq);
2629
2630         txq->ops->reset(txq);
2631
2632         dev->data->tx_queues[queue_idx] = txq;
2633
2634
2635         return 0;
2636 }
2637
2638 /**
2639  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2640  *
2641  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2642  * in the sw_rsc_ring is not set to NULL but rather points to the next
2643  * mbuf of this RSC aggregation (that has not been completed yet and still
2644  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2645  * will just free first "nb_segs" segments of the cluster explicitly by calling
2646  * an rte_pktmbuf_free_seg().
2647  *
2648  * @m scattered cluster head
2649  */
2650 static void __attribute__((cold))
2651 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2652 {
2653         uint16_t i, nb_segs = m->nb_segs;
2654         struct rte_mbuf *next_seg;
2655
2656         for (i = 0; i < nb_segs; i++) {
2657                 next_seg = m->next;
2658                 rte_pktmbuf_free_seg(m);
2659                 m = next_seg;
2660         }
2661 }
2662
2663 static void __attribute__((cold))
2664 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2665 {
2666         unsigned i;
2667
2668 #ifdef RTE_IXGBE_INC_VECTOR
2669         /* SSE Vector driver has a different way of releasing mbufs. */
2670         if (rxq->rx_using_sse) {
2671                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2672                 return;
2673         }
2674 #endif
2675
2676         if (rxq->sw_ring != NULL) {
2677                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2678                         if (rxq->sw_ring[i].mbuf != NULL) {
2679                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2680                                 rxq->sw_ring[i].mbuf = NULL;
2681                         }
2682                 }
2683                 if (rxq->rx_nb_avail) {
2684                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2685                                 struct rte_mbuf *mb;
2686
2687                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2688                                 rte_pktmbuf_free_seg(mb);
2689                         }
2690                         rxq->rx_nb_avail = 0;
2691                 }
2692         }
2693
2694         if (rxq->sw_sc_ring)
2695                 for (i = 0; i < rxq->nb_rx_desc; i++)
2696                         if (rxq->sw_sc_ring[i].fbuf) {
2697                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2698                                 rxq->sw_sc_ring[i].fbuf = NULL;
2699                         }
2700 }
2701
2702 static void __attribute__((cold))
2703 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2704 {
2705         if (rxq != NULL) {
2706                 ixgbe_rx_queue_release_mbufs(rxq);
2707                 rte_free(rxq->sw_ring);
2708                 rte_free(rxq->sw_sc_ring);
2709                 rte_free(rxq);
2710         }
2711 }
2712
2713 void __attribute__((cold))
2714 ixgbe_dev_rx_queue_release(void *rxq)
2715 {
2716         ixgbe_rx_queue_release(rxq);
2717 }
2718
2719 /*
2720  * Check if Rx Burst Bulk Alloc function can be used.
2721  * Return
2722  *        0: the preconditions are satisfied and the bulk allocation function
2723  *           can be used.
2724  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2725  *           function must be used.
2726  */
2727 static inline int __attribute__((cold))
2728 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2729 {
2730         int ret = 0;
2731
2732         /*
2733          * Make sure the following pre-conditions are satisfied:
2734          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2735          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2736          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2737          * Scattered packets are not supported.  This should be checked
2738          * outside of this function.
2739          */
2740         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2741                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2742                              "rxq->rx_free_thresh=%d, "
2743                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2744                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2745                 ret = -EINVAL;
2746         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2747                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2748                              "rxq->rx_free_thresh=%d, "
2749                              "rxq->nb_rx_desc=%d",
2750                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2751                 ret = -EINVAL;
2752         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2753                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2754                              "rxq->nb_rx_desc=%d, "
2755                              "rxq->rx_free_thresh=%d",
2756                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2757                 ret = -EINVAL;
2758         }
2759
2760         return ret;
2761 }
2762
2763 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2764 static void __attribute__((cold))
2765 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2766 {
2767         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2768         unsigned i;
2769         uint16_t len = rxq->nb_rx_desc;
2770
2771         /*
2772          * By default, the Rx queue setup function allocates enough memory for
2773          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2774          * extra memory at the end of the descriptor ring to be zero'd out.
2775          */
2776         if (adapter->rx_bulk_alloc_allowed)
2777                 /* zero out extra memory */
2778                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2779
2780         /*
2781          * Zero out HW ring memory. Zero out extra memory at the end of
2782          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2783          * reads extra memory as zeros.
2784          */
2785         for (i = 0; i < len; i++) {
2786                 rxq->rx_ring[i] = zeroed_desc;
2787         }
2788
2789         /*
2790          * initialize extra software ring entries. Space for these extra
2791          * entries is always allocated
2792          */
2793         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2794         for (i = rxq->nb_rx_desc; i < len; ++i) {
2795                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2796         }
2797
2798         rxq->rx_nb_avail = 0;
2799         rxq->rx_next_avail = 0;
2800         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2801         rxq->rx_tail = 0;
2802         rxq->nb_rx_hold = 0;
2803         rxq->pkt_first_seg = NULL;
2804         rxq->pkt_last_seg = NULL;
2805
2806 #ifdef RTE_IXGBE_INC_VECTOR
2807         rxq->rxrearm_start = 0;
2808         rxq->rxrearm_nb = 0;
2809 #endif
2810 }
2811
2812 static int
2813 ixgbe_is_vf(struct rte_eth_dev *dev)
2814 {
2815         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2816
2817         switch (hw->mac.type) {
2818         case ixgbe_mac_82599_vf:
2819         case ixgbe_mac_X540_vf:
2820         case ixgbe_mac_X550_vf:
2821         case ixgbe_mac_X550EM_x_vf:
2822         case ixgbe_mac_X550EM_a_vf:
2823                 return 1;
2824         default:
2825                 return 0;
2826         }
2827 }
2828
2829 uint64_t
2830 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2831 {
2832         uint64_t offloads = 0;
2833         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2834
2835         if (hw->mac.type != ixgbe_mac_82598EB)
2836                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2837
2838         return offloads;
2839 }
2840
2841 uint64_t
2842 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2843 {
2844         uint64_t offloads;
2845         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2846
2847         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2848                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2849                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2850                    DEV_RX_OFFLOAD_KEEP_CRC    |
2851                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2852                    DEV_RX_OFFLOAD_SCATTER;
2853
2854         if (hw->mac.type == ixgbe_mac_82598EB)
2855                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2856
2857         if (ixgbe_is_vf(dev) == 0)
2858                 offloads |= (DEV_RX_OFFLOAD_VLAN_FILTER |
2859                              DEV_RX_OFFLOAD_VLAN_EXTEND);
2860
2861         /*
2862          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2863          * mode.
2864          */
2865         if ((hw->mac.type == ixgbe_mac_82599EB ||
2866              hw->mac.type == ixgbe_mac_X540) &&
2867             !RTE_ETH_DEV_SRIOV(dev).active)
2868                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
2869
2870         if (hw->mac.type == ixgbe_mac_82599EB ||
2871             hw->mac.type == ixgbe_mac_X540)
2872                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
2873
2874         if (hw->mac.type == ixgbe_mac_X550 ||
2875             hw->mac.type == ixgbe_mac_X550EM_x ||
2876             hw->mac.type == ixgbe_mac_X550EM_a)
2877                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
2878
2879 #ifdef RTE_LIBRTE_SECURITY
2880         if (dev->security_ctx)
2881                 offloads |= DEV_RX_OFFLOAD_SECURITY;
2882 #endif
2883
2884         return offloads;
2885 }
2886
2887 int __attribute__((cold))
2888 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2889                          uint16_t queue_idx,
2890                          uint16_t nb_desc,
2891                          unsigned int socket_id,
2892                          const struct rte_eth_rxconf *rx_conf,
2893                          struct rte_mempool *mp)
2894 {
2895         const struct rte_memzone *rz;
2896         struct ixgbe_rx_queue *rxq;
2897         struct ixgbe_hw     *hw;
2898         uint16_t len;
2899         struct ixgbe_adapter *adapter =
2900                 (struct ixgbe_adapter *)dev->data->dev_private;
2901         uint64_t offloads;
2902
2903         PMD_INIT_FUNC_TRACE();
2904         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2905
2906         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
2907
2908         /*
2909          * Validate number of receive descriptors.
2910          * It must not exceed hardware maximum, and must be multiple
2911          * of IXGBE_ALIGN.
2912          */
2913         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2914                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2915                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2916                 return -EINVAL;
2917         }
2918
2919         /* Free memory prior to re-allocation if needed... */
2920         if (dev->data->rx_queues[queue_idx] != NULL) {
2921                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2922                 dev->data->rx_queues[queue_idx] = NULL;
2923         }
2924
2925         /* First allocate the rx queue data structure */
2926         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2927                                  RTE_CACHE_LINE_SIZE, socket_id);
2928         if (rxq == NULL)
2929                 return -ENOMEM;
2930         rxq->mb_pool = mp;
2931         rxq->nb_rx_desc = nb_desc;
2932         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2933         rxq->queue_id = queue_idx;
2934         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2935                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2936         rxq->port_id = dev->data->port_id;
2937         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2938                 rxq->crc_len = ETHER_CRC_LEN;
2939         else
2940                 rxq->crc_len = 0;
2941         rxq->drop_en = rx_conf->rx_drop_en;
2942         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2943         rxq->offloads = offloads;
2944
2945         /*
2946          * The packet type in RX descriptor is different for different NICs.
2947          * Some bits are used for x550 but reserved for other NICS.
2948          * So set different masks for different NICs.
2949          */
2950         if (hw->mac.type == ixgbe_mac_X550 ||
2951             hw->mac.type == ixgbe_mac_X550EM_x ||
2952             hw->mac.type == ixgbe_mac_X550EM_a ||
2953             hw->mac.type == ixgbe_mac_X550_vf ||
2954             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2955             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2956                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2957         else
2958                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2959
2960         /*
2961          * Allocate RX ring hardware descriptors. A memzone large enough to
2962          * handle the maximum ring size is allocated in order to allow for
2963          * resizing in later calls to the queue setup function.
2964          */
2965         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2966                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2967         if (rz == NULL) {
2968                 ixgbe_rx_queue_release(rxq);
2969                 return -ENOMEM;
2970         }
2971
2972         /*
2973          * Zero init all the descriptors in the ring.
2974          */
2975         memset(rz->addr, 0, RX_RING_SZ);
2976
2977         /*
2978          * Modified to setup VFRDT for Virtual Function
2979          */
2980         if (hw->mac.type == ixgbe_mac_82599_vf ||
2981             hw->mac.type == ixgbe_mac_X540_vf ||
2982             hw->mac.type == ixgbe_mac_X550_vf ||
2983             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2984             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2985                 rxq->rdt_reg_addr =
2986                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2987                 rxq->rdh_reg_addr =
2988                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2989         } else {
2990                 rxq->rdt_reg_addr =
2991                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2992                 rxq->rdh_reg_addr =
2993                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2994         }
2995
2996         rxq->rx_ring_phys_addr = rz->iova;
2997         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2998
2999         /*
3000          * Certain constraints must be met in order to use the bulk buffer
3001          * allocation Rx burst function. If any of Rx queues doesn't meet them
3002          * the feature should be disabled for the whole port.
3003          */
3004         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3005                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3006                                     "preconditions - canceling the feature for "
3007                                     "the whole port[%d]",
3008                              rxq->queue_id, rxq->port_id);
3009                 adapter->rx_bulk_alloc_allowed = false;
3010         }
3011
3012         /*
3013          * Allocate software ring. Allow for space at the end of the
3014          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3015          * function does not access an invalid memory region.
3016          */
3017         len = nb_desc;
3018         if (adapter->rx_bulk_alloc_allowed)
3019                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3020
3021         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3022                                           sizeof(struct ixgbe_rx_entry) * len,
3023                                           RTE_CACHE_LINE_SIZE, socket_id);
3024         if (!rxq->sw_ring) {
3025                 ixgbe_rx_queue_release(rxq);
3026                 return -ENOMEM;
3027         }
3028
3029         /*
3030          * Always allocate even if it's not going to be needed in order to
3031          * simplify the code.
3032          *
3033          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3034          * be requested in ixgbe_dev_rx_init(), which is called later from
3035          * dev_start() flow.
3036          */
3037         rxq->sw_sc_ring =
3038                 rte_zmalloc_socket("rxq->sw_sc_ring",
3039                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3040                                    RTE_CACHE_LINE_SIZE, socket_id);
3041         if (!rxq->sw_sc_ring) {
3042                 ixgbe_rx_queue_release(rxq);
3043                 return -ENOMEM;
3044         }
3045
3046         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3047                             "dma_addr=0x%"PRIx64,
3048                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3049                      rxq->rx_ring_phys_addr);
3050
3051         if (!rte_is_power_of_2(nb_desc)) {
3052                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3053                                     "preconditions - canceling the feature for "
3054                                     "the whole port[%d]",
3055                              rxq->queue_id, rxq->port_id);
3056                 adapter->rx_vec_allowed = false;
3057         } else
3058                 ixgbe_rxq_vec_setup(rxq);
3059
3060         dev->data->rx_queues[queue_idx] = rxq;
3061
3062         ixgbe_reset_rx_queue(adapter, rxq);
3063
3064         return 0;
3065 }
3066
3067 uint32_t
3068 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3069 {
3070 #define IXGBE_RXQ_SCAN_INTERVAL 4
3071         volatile union ixgbe_adv_rx_desc *rxdp;
3072         struct ixgbe_rx_queue *rxq;
3073         uint32_t desc = 0;
3074
3075         rxq = dev->data->rx_queues[rx_queue_id];
3076         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3077
3078         while ((desc < rxq->nb_rx_desc) &&
3079                 (rxdp->wb.upper.status_error &
3080                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3081                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3082                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3083                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3084                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3085                                 desc - rxq->nb_rx_desc]);
3086         }
3087
3088         return desc;
3089 }
3090
3091 int
3092 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3093 {
3094         volatile union ixgbe_adv_rx_desc *rxdp;
3095         struct ixgbe_rx_queue *rxq = rx_queue;
3096         uint32_t desc;
3097
3098         if (unlikely(offset >= rxq->nb_rx_desc))
3099                 return 0;
3100         desc = rxq->rx_tail + offset;
3101         if (desc >= rxq->nb_rx_desc)
3102                 desc -= rxq->nb_rx_desc;
3103
3104         rxdp = &rxq->rx_ring[desc];
3105         return !!(rxdp->wb.upper.status_error &
3106                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3107 }
3108
3109 int
3110 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3111 {
3112         struct ixgbe_rx_queue *rxq = rx_queue;
3113         volatile uint32_t *status;
3114         uint32_t nb_hold, desc;
3115
3116         if (unlikely(offset >= rxq->nb_rx_desc))
3117                 return -EINVAL;
3118
3119 #ifdef RTE_IXGBE_INC_VECTOR
3120         if (rxq->rx_using_sse)
3121                 nb_hold = rxq->rxrearm_nb;
3122         else
3123 #endif
3124                 nb_hold = rxq->nb_rx_hold;
3125         if (offset >= rxq->nb_rx_desc - nb_hold)
3126                 return RTE_ETH_RX_DESC_UNAVAIL;
3127
3128         desc = rxq->rx_tail + offset;
3129         if (desc >= rxq->nb_rx_desc)
3130                 desc -= rxq->nb_rx_desc;
3131
3132         status = &rxq->rx_ring[desc].wb.upper.status_error;
3133         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3134                 return RTE_ETH_RX_DESC_DONE;
3135
3136         return RTE_ETH_RX_DESC_AVAIL;
3137 }
3138
3139 int
3140 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3141 {
3142         struct ixgbe_tx_queue *txq = tx_queue;
3143         volatile uint32_t *status;
3144         uint32_t desc;
3145
3146         if (unlikely(offset >= txq->nb_tx_desc))
3147                 return -EINVAL;
3148
3149         desc = txq->tx_tail + offset;
3150         /* go to next desc that has the RS bit */
3151         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3152                 txq->tx_rs_thresh;
3153         if (desc >= txq->nb_tx_desc) {
3154                 desc -= txq->nb_tx_desc;
3155                 if (desc >= txq->nb_tx_desc)
3156                         desc -= txq->nb_tx_desc;
3157         }
3158
3159         status = &txq->tx_ring[desc].wb.status;
3160         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3161                 return RTE_ETH_TX_DESC_DONE;
3162
3163         return RTE_ETH_TX_DESC_FULL;
3164 }
3165
3166 void __attribute__((cold))
3167 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3168 {
3169         unsigned i;
3170         struct ixgbe_adapter *adapter =
3171                 (struct ixgbe_adapter *)dev->data->dev_private;
3172
3173         PMD_INIT_FUNC_TRACE();
3174
3175         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3176                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3177
3178                 if (txq != NULL) {
3179                         txq->ops->release_mbufs(txq);
3180                         txq->ops->reset(txq);
3181                 }
3182         }
3183
3184         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3185                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3186
3187                 if (rxq != NULL) {
3188                         ixgbe_rx_queue_release_mbufs(rxq);
3189                         ixgbe_reset_rx_queue(adapter, rxq);
3190                 }
3191         }
3192 }
3193
3194 void
3195 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3196 {
3197         unsigned i;
3198
3199         PMD_INIT_FUNC_TRACE();
3200
3201         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3202                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3203                 dev->data->rx_queues[i] = NULL;
3204         }
3205         dev->data->nb_rx_queues = 0;
3206
3207         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3208                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3209                 dev->data->tx_queues[i] = NULL;
3210         }
3211         dev->data->nb_tx_queues = 0;
3212 }
3213
3214 /*********************************************************************
3215  *
3216  *  Device RX/TX init functions
3217  *
3218  **********************************************************************/
3219
3220 /**
3221  * Receive Side Scaling (RSS)
3222  * See section 7.1.2.8 in the following document:
3223  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3224  *
3225  * Principles:
3226  * The source and destination IP addresses of the IP header and the source
3227  * and destination ports of TCP/UDP headers, if any, of received packets are
3228  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3229  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3230  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3231  * RSS output index which is used as the RX queue index where to store the
3232  * received packets.
3233  * The following output is supplied in the RX write-back descriptor:
3234  *     - 32-bit result of the Microsoft RSS hash function,
3235  *     - 4-bit RSS type field.
3236  */
3237
3238 /*
3239  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3240  * Used as the default key.
3241  */
3242 static uint8_t rss_intel_key[40] = {
3243         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3244         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3245         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3246         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3247         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3248 };
3249
3250 static void
3251 ixgbe_rss_disable(struct rte_eth_dev *dev)
3252 {
3253         struct ixgbe_hw *hw;
3254         uint32_t mrqc;
3255         uint32_t mrqc_reg;
3256
3257         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3258         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3259         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3260         mrqc &= ~IXGBE_MRQC_RSSEN;
3261         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3262 }
3263
3264 static void
3265 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3266 {
3267         uint8_t  *hash_key;
3268         uint32_t mrqc;
3269         uint32_t rss_key;
3270         uint64_t rss_hf;
3271         uint16_t i;
3272         uint32_t mrqc_reg;
3273         uint32_t rssrk_reg;
3274
3275         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3276         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3277
3278         hash_key = rss_conf->rss_key;
3279         if (hash_key != NULL) {
3280                 /* Fill in RSS hash key */
3281                 for (i = 0; i < 10; i++) {
3282                         rss_key  = hash_key[(i * 4)];
3283                         rss_key |= hash_key[(i * 4) + 1] << 8;
3284                         rss_key |= hash_key[(i * 4) + 2] << 16;
3285                         rss_key |= hash_key[(i * 4) + 3] << 24;
3286                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3287                 }
3288         }
3289
3290         /* Set configured hashing protocols in MRQC register */
3291         rss_hf = rss_conf->rss_hf;
3292         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3293         if (rss_hf & ETH_RSS_IPV4)
3294                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3295         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3296                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3297         if (rss_hf & ETH_RSS_IPV6)
3298                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3299         if (rss_hf & ETH_RSS_IPV6_EX)
3300                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3301         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3302                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3303         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3304                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3305         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3306                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3307         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3308                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3309         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3310                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3311         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3312 }
3313
3314 int
3315 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3316                           struct rte_eth_rss_conf *rss_conf)
3317 {
3318         struct ixgbe_hw *hw;
3319         uint32_t mrqc;
3320         uint64_t rss_hf;
3321         uint32_t mrqc_reg;
3322
3323         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3324
3325         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3326                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3327                         "NIC.");
3328                 return -ENOTSUP;
3329         }
3330         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3331
3332         /*
3333          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3334          *     "RSS enabling cannot be done dynamically while it must be
3335          *      preceded by a software reset"
3336          * Before changing anything, first check that the update RSS operation
3337          * does not attempt to disable RSS, if RSS was enabled at
3338          * initialization time, or does not attempt to enable RSS, if RSS was
3339          * disabled at initialization time.
3340          */
3341         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3342         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3343         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3344                 if (rss_hf != 0) /* Enable RSS */
3345                         return -(EINVAL);
3346                 return 0; /* Nothing to do */
3347         }
3348         /* RSS enabled */
3349         if (rss_hf == 0) /* Disable RSS */
3350                 return -(EINVAL);
3351         ixgbe_hw_rss_hash_set(hw, rss_conf);
3352         return 0;
3353 }
3354
3355 int
3356 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3357                             struct rte_eth_rss_conf *rss_conf)
3358 {
3359         struct ixgbe_hw *hw;
3360         uint8_t *hash_key;
3361         uint32_t mrqc;
3362         uint32_t rss_key;
3363         uint64_t rss_hf;
3364         uint16_t i;
3365         uint32_t mrqc_reg;
3366         uint32_t rssrk_reg;
3367
3368         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3369         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3370         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3371         hash_key = rss_conf->rss_key;
3372         if (hash_key != NULL) {
3373                 /* Return RSS hash key */
3374                 for (i = 0; i < 10; i++) {
3375                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3376                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3377                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3378                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3379                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3380                 }
3381         }
3382
3383         /* Get RSS functions configured in MRQC register */
3384         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3385         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3386                 rss_conf->rss_hf = 0;
3387                 return 0;
3388         }
3389         rss_hf = 0;
3390         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3391                 rss_hf |= ETH_RSS_IPV4;
3392         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3393                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3394         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3395                 rss_hf |= ETH_RSS_IPV6;
3396         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3397                 rss_hf |= ETH_RSS_IPV6_EX;
3398         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3399                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3400         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3401                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3402         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3403                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3404         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3405                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3406         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3407                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3408         rss_conf->rss_hf = rss_hf;
3409         return 0;
3410 }
3411
3412 static void
3413 ixgbe_rss_configure(struct rte_eth_dev *dev)
3414 {
3415         struct rte_eth_rss_conf rss_conf;
3416         struct ixgbe_hw *hw;
3417         uint32_t reta;
3418         uint16_t i;
3419         uint16_t j;
3420         uint16_t sp_reta_size;
3421         uint32_t reta_reg;
3422
3423         PMD_INIT_FUNC_TRACE();
3424         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3425
3426         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3427
3428         /*
3429          * Fill in redirection table
3430          * The byte-swap is needed because NIC registers are in
3431          * little-endian order.
3432          */
3433         reta = 0;
3434         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3435                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3436
3437                 if (j == dev->data->nb_rx_queues)
3438                         j = 0;
3439                 reta = (reta << 8) | j;
3440                 if ((i & 3) == 3)
3441                         IXGBE_WRITE_REG(hw, reta_reg,
3442                                         rte_bswap32(reta));
3443         }
3444
3445         /*
3446          * Configure the RSS key and the RSS protocols used to compute
3447          * the RSS hash of input packets.
3448          */
3449         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3450         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3451                 ixgbe_rss_disable(dev);
3452                 return;
3453         }
3454         if (rss_conf.rss_key == NULL)
3455                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3456         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3457 }
3458
3459 #define NUM_VFTA_REGISTERS 128
3460 #define NIC_RX_BUFFER_SIZE 0x200
3461 #define X550_RX_BUFFER_SIZE 0x180
3462
3463 static void
3464 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3465 {
3466         struct rte_eth_vmdq_dcb_conf *cfg;
3467         struct ixgbe_hw *hw;
3468         enum rte_eth_nb_pools num_pools;
3469         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3470         uint16_t pbsize;
3471         uint8_t nb_tcs; /* number of traffic classes */
3472         int i;
3473
3474         PMD_INIT_FUNC_TRACE();
3475         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3476         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3477         num_pools = cfg->nb_queue_pools;
3478         /* Check we have a valid number of pools */
3479         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3480                 ixgbe_rss_disable(dev);
3481                 return;
3482         }
3483         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3484         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3485
3486         /*
3487          * RXPBSIZE
3488          * split rx buffer up into sections, each for 1 traffic class
3489          */
3490         switch (hw->mac.type) {
3491         case ixgbe_mac_X550:
3492         case ixgbe_mac_X550EM_x:
3493         case ixgbe_mac_X550EM_a:
3494                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3495                 break;
3496         default:
3497                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3498                 break;
3499         }
3500         for (i = 0; i < nb_tcs; i++) {
3501                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3502
3503                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3504                 /* clear 10 bits. */
3505                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3506                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3507         }
3508         /* zero alloc all unused TCs */
3509         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3510                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3511
3512                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3513                 /* clear 10 bits. */
3514                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3515         }
3516
3517         /* MRQC: enable vmdq and dcb */
3518         mrqc = (num_pools == ETH_16_POOLS) ?
3519                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3520         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3521
3522         /* PFVTCTL: turn on virtualisation and set the default pool */
3523         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3524         if (cfg->enable_default_pool) {
3525                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3526         } else {
3527                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3528         }
3529
3530         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3531
3532         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3533         queue_mapping = 0;
3534         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3535                 /*
3536                  * mapping is done with 3 bits per priority,
3537                  * so shift by i*3 each time
3538                  */
3539                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3540
3541         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3542
3543         /* RTRPCS: DCB related */
3544         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3545
3546         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3547         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3548         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3549         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3550
3551         /* VFTA - enable all vlan filters */
3552         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3553                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3554         }
3555
3556         /* VFRE: pool enabling for receive - 16 or 32 */
3557         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3558                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3559
3560         /*
3561          * MPSAR - allow pools to read specific mac addresses
3562          * In this case, all pools should be able to read from mac addr 0
3563          */
3564         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3565         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3566
3567         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3568         for (i = 0; i < cfg->nb_pool_maps; i++) {
3569                 /* set vlan id in VF register and set the valid bit */
3570                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3571                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3572                 /*
3573                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3574                  * pools, we only need to use the first half of the register
3575                  * i.e. bits 0-31
3576                  */
3577                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3578         }
3579 }
3580
3581 /**
3582  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3583  * @dev: pointer to eth_dev structure
3584  * @dcb_config: pointer to ixgbe_dcb_config structure
3585  */
3586 static void
3587 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3588                        struct ixgbe_dcb_config *dcb_config)
3589 {
3590         uint32_t reg;
3591         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3592
3593         PMD_INIT_FUNC_TRACE();
3594         if (hw->mac.type != ixgbe_mac_82598EB) {
3595                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3596                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3597                 reg |= IXGBE_RTTDCS_ARBDIS;
3598                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3599
3600                 /* Enable DCB for Tx with 8 TCs */
3601                 if (dcb_config->num_tcs.pg_tcs == 8) {
3602                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3603                 } else {
3604                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3605                 }
3606                 if (dcb_config->vt_mode)
3607                         reg |= IXGBE_MTQC_VT_ENA;
3608                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3609
3610                 /* Enable the Tx desc arbiter */
3611                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3612                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3613                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3614
3615                 /* Enable Security TX Buffer IFG for DCB */
3616                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3617                 reg |= IXGBE_SECTX_DCB;
3618                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3619         }
3620 }
3621
3622 /**
3623  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3624  * @dev: pointer to rte_eth_dev structure
3625  * @dcb_config: pointer to ixgbe_dcb_config structure
3626  */
3627 static void
3628 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3629                         struct ixgbe_dcb_config *dcb_config)
3630 {
3631         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3632                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3633         struct ixgbe_hw *hw =
3634                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3635
3636         PMD_INIT_FUNC_TRACE();
3637         if (hw->mac.type != ixgbe_mac_82598EB)
3638                 /*PF VF Transmit Enable*/
3639                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3640                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3641
3642         /*Configure general DCB TX parameters*/
3643         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3644 }
3645
3646 static void
3647 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3648                         struct ixgbe_dcb_config *dcb_config)
3649 {
3650         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3651                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3652         struct ixgbe_dcb_tc_config *tc;
3653         uint8_t i, j;
3654
3655         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3656         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3657                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3658                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3659         } else {
3660                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3661                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3662         }
3663
3664         /* Initialize User Priority to Traffic Class mapping */
3665         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3666                 tc = &dcb_config->tc_config[j];
3667                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3668         }
3669
3670         /* User Priority to Traffic Class mapping */
3671         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3672                 j = vmdq_rx_conf->dcb_tc[i];
3673                 tc = &dcb_config->tc_config[j];
3674                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3675                                                 (uint8_t)(1 << i);
3676         }
3677 }
3678
3679 static void
3680 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3681                         struct ixgbe_dcb_config *dcb_config)
3682 {
3683         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3684                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3685         struct ixgbe_dcb_tc_config *tc;
3686         uint8_t i, j;
3687
3688         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3689         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3690                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3691                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3692         } else {
3693                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3694                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3695         }
3696
3697         /* Initialize User Priority to Traffic Class mapping */
3698         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3699                 tc = &dcb_config->tc_config[j];
3700                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3701         }
3702
3703         /* User Priority to Traffic Class mapping */
3704         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3705                 j = vmdq_tx_conf->dcb_tc[i];
3706                 tc = &dcb_config->tc_config[j];
3707                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3708                                                 (uint8_t)(1 << i);
3709         }
3710 }
3711
3712 static void
3713 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3714                 struct ixgbe_dcb_config *dcb_config)
3715 {
3716         struct rte_eth_dcb_rx_conf *rx_conf =
3717                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3718         struct ixgbe_dcb_tc_config *tc;
3719         uint8_t i, j;
3720
3721         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3722         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3723
3724         /* Initialize User Priority to Traffic Class mapping */
3725         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3726                 tc = &dcb_config->tc_config[j];
3727                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3728         }
3729
3730         /* User Priority to Traffic Class mapping */
3731         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3732                 j = rx_conf->dcb_tc[i];
3733                 tc = &dcb_config->tc_config[j];
3734                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3735                                                 (uint8_t)(1 << i);
3736         }
3737 }
3738
3739 static void
3740 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3741                 struct ixgbe_dcb_config *dcb_config)
3742 {
3743         struct rte_eth_dcb_tx_conf *tx_conf =
3744                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3745         struct ixgbe_dcb_tc_config *tc;
3746         uint8_t i, j;
3747
3748         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3749         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3750
3751         /* Initialize User Priority to Traffic Class mapping */
3752         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3753                 tc = &dcb_config->tc_config[j];
3754                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3755         }
3756
3757         /* User Priority to Traffic Class mapping */
3758         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3759                 j = tx_conf->dcb_tc[i];
3760                 tc = &dcb_config->tc_config[j];
3761                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3762                                                 (uint8_t)(1 << i);
3763         }
3764 }
3765
3766 /**
3767  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3768  * @dev: pointer to eth_dev structure
3769  * @dcb_config: pointer to ixgbe_dcb_config structure
3770  */
3771 static void
3772 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3773                        struct ixgbe_dcb_config *dcb_config)
3774 {
3775         uint32_t reg;
3776         uint32_t vlanctrl;
3777         uint8_t i;
3778         uint32_t q;
3779         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3780
3781         PMD_INIT_FUNC_TRACE();
3782         /*
3783          * Disable the arbiter before changing parameters
3784          * (always enable recycle mode; WSP)
3785          */
3786         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3787         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3788
3789         if (hw->mac.type != ixgbe_mac_82598EB) {
3790                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3791                 if (dcb_config->num_tcs.pg_tcs == 4) {
3792                         if (dcb_config->vt_mode)
3793                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3794                                         IXGBE_MRQC_VMDQRT4TCEN;
3795                         else {
3796                                 /* no matter the mode is DCB or DCB_RSS, just
3797                                  * set the MRQE to RSSXTCEN. RSS is controlled
3798                                  * by RSS_FIELD
3799                                  */
3800                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3801                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3802                                         IXGBE_MRQC_RTRSS4TCEN;
3803                         }
3804                 }
3805                 if (dcb_config->num_tcs.pg_tcs == 8) {
3806                         if (dcb_config->vt_mode)
3807                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3808                                         IXGBE_MRQC_VMDQRT8TCEN;
3809                         else {
3810                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3811                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3812                                         IXGBE_MRQC_RTRSS8TCEN;
3813                         }
3814                 }
3815
3816                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3817
3818                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3819                         /* Disable drop for all queues in VMDQ mode*/
3820                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3821                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3822                                                 (IXGBE_QDE_WRITE |
3823                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3824                 } else {
3825                         /* Enable drop for all queues in SRIOV mode */
3826                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3827                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3828                                                 (IXGBE_QDE_WRITE |
3829                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3830                                                  IXGBE_QDE_ENABLE));
3831                 }
3832         }
3833
3834         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3835         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3836         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3837         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3838
3839         /* VFTA - enable all vlan filters */
3840         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3841                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3842         }
3843
3844         /*
3845          * Configure Rx packet plane (recycle mode; WSP) and
3846          * enable arbiter
3847          */
3848         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3849         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3850 }
3851
3852 static void
3853 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3854                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3855 {
3856         switch (hw->mac.type) {
3857         case ixgbe_mac_82598EB:
3858                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3859                 break;
3860         case ixgbe_mac_82599EB:
3861         case ixgbe_mac_X540:
3862         case ixgbe_mac_X550:
3863         case ixgbe_mac_X550EM_x:
3864         case ixgbe_mac_X550EM_a:
3865                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3866                                                   tsa, map);
3867                 break;
3868         default:
3869                 break;
3870         }
3871 }
3872
3873 static void
3874 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3875                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3876 {
3877         switch (hw->mac.type) {
3878         case ixgbe_mac_82598EB:
3879                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3880                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3881                 break;
3882         case ixgbe_mac_82599EB:
3883         case ixgbe_mac_X540:
3884         case ixgbe_mac_X550:
3885         case ixgbe_mac_X550EM_x:
3886         case ixgbe_mac_X550EM_a:
3887                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3888                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3889                 break;
3890         default:
3891                 break;
3892         }
3893 }
3894
3895 #define DCB_RX_CONFIG  1
3896 #define DCB_TX_CONFIG  1
3897 #define DCB_TX_PB      1024
3898 /**
3899  * ixgbe_dcb_hw_configure - Enable DCB and configure
3900  * general DCB in VT mode and non-VT mode parameters
3901  * @dev: pointer to rte_eth_dev structure
3902  * @dcb_config: pointer to ixgbe_dcb_config structure
3903  */
3904 static int
3905 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3906                         struct ixgbe_dcb_config *dcb_config)
3907 {
3908         int     ret = 0;
3909         uint8_t i, pfc_en, nb_tcs;
3910         uint16_t pbsize, rx_buffer_size;
3911         uint8_t config_dcb_rx = 0;
3912         uint8_t config_dcb_tx = 0;
3913         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3914         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3915         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3916         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3917         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3918         struct ixgbe_dcb_tc_config *tc;
3919         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3920         struct ixgbe_hw *hw =
3921                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3922         struct ixgbe_bw_conf *bw_conf =
3923                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3924
3925         switch (dev->data->dev_conf.rxmode.mq_mode) {
3926         case ETH_MQ_RX_VMDQ_DCB:
3927                 dcb_config->vt_mode = true;
3928                 if (hw->mac.type != ixgbe_mac_82598EB) {
3929                         config_dcb_rx = DCB_RX_CONFIG;
3930                         /*
3931                          *get dcb and VT rx configuration parameters
3932                          *from rte_eth_conf
3933                          */
3934                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3935                         /*Configure general VMDQ and DCB RX parameters*/
3936                         ixgbe_vmdq_dcb_configure(dev);
3937                 }
3938                 break;
3939         case ETH_MQ_RX_DCB:
3940         case ETH_MQ_RX_DCB_RSS:
3941                 dcb_config->vt_mode = false;
3942                 config_dcb_rx = DCB_RX_CONFIG;
3943                 /* Get dcb TX configuration parameters from rte_eth_conf */
3944                 ixgbe_dcb_rx_config(dev, dcb_config);
3945                 /*Configure general DCB RX parameters*/
3946                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3947                 break;
3948         default:
3949                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3950                 break;
3951         }
3952         switch (dev->data->dev_conf.txmode.mq_mode) {
3953         case ETH_MQ_TX_VMDQ_DCB:
3954                 dcb_config->vt_mode = true;
3955                 config_dcb_tx = DCB_TX_CONFIG;
3956                 /* get DCB and VT TX configuration parameters
3957                  * from rte_eth_conf
3958                  */
3959                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3960                 /*Configure general VMDQ and DCB TX parameters*/
3961                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3962                 break;
3963
3964         case ETH_MQ_TX_DCB:
3965                 dcb_config->vt_mode = false;
3966                 config_dcb_tx = DCB_TX_CONFIG;
3967                 /*get DCB TX configuration parameters from rte_eth_conf*/
3968                 ixgbe_dcb_tx_config(dev, dcb_config);
3969                 /*Configure general DCB TX parameters*/
3970                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3971                 break;
3972         default:
3973                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3974                 break;
3975         }
3976
3977         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3978         /* Unpack map */
3979         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3980         if (nb_tcs == ETH_4_TCS) {
3981                 /* Avoid un-configured priority mapping to TC0 */
3982                 uint8_t j = 4;
3983                 uint8_t mask = 0xFF;
3984
3985                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3986                         mask = (uint8_t)(mask & (~(1 << map[i])));
3987                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3988                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3989                                 map[j++] = i;
3990                         mask >>= 1;
3991                 }
3992                 /* Re-configure 4 TCs BW */
3993                 for (i = 0; i < nb_tcs; i++) {
3994                         tc = &dcb_config->tc_config[i];
3995                         if (bw_conf->tc_num != nb_tcs)
3996                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3997                                         (uint8_t)(100 / nb_tcs);
3998                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3999                                                 (uint8_t)(100 / nb_tcs);
4000                 }
4001                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4002                         tc = &dcb_config->tc_config[i];
4003                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4004                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4005                 }
4006         } else {
4007                 /* Re-configure 8 TCs BW */
4008                 for (i = 0; i < nb_tcs; i++) {
4009                         tc = &dcb_config->tc_config[i];
4010                         if (bw_conf->tc_num != nb_tcs)
4011                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4012                                         (uint8_t)(100 / nb_tcs + (i & 1));
4013                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4014                                 (uint8_t)(100 / nb_tcs + (i & 1));
4015                 }
4016         }
4017
4018         switch (hw->mac.type) {
4019         case ixgbe_mac_X550:
4020         case ixgbe_mac_X550EM_x:
4021         case ixgbe_mac_X550EM_a:
4022                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4023                 break;
4024         default:
4025                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4026                 break;
4027         }
4028
4029         if (config_dcb_rx) {
4030                 /* Set RX buffer size */
4031                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4032                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4033
4034                 for (i = 0; i < nb_tcs; i++) {
4035                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4036                 }
4037                 /* zero alloc all unused TCs */
4038                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4039                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4040                 }
4041         }
4042         if (config_dcb_tx) {
4043                 /* Only support an equally distributed
4044                  *  Tx packet buffer strategy.
4045                  */
4046                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4047                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4048
4049                 for (i = 0; i < nb_tcs; i++) {
4050                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4051                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4052                 }
4053                 /* Clear unused TCs, if any, to zero buffer size*/
4054                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4055                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4056                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4057                 }
4058         }
4059
4060         /*Calculates traffic class credits*/
4061         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4062                                 IXGBE_DCB_TX_CONFIG);
4063         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4064                                 IXGBE_DCB_RX_CONFIG);
4065
4066         if (config_dcb_rx) {
4067                 /* Unpack CEE standard containers */
4068                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4069                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4070                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4071                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4072                 /* Configure PG(ETS) RX */
4073                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4074         }
4075
4076         if (config_dcb_tx) {
4077                 /* Unpack CEE standard containers */
4078                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4079                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4080                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4081                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4082                 /* Configure PG(ETS) TX */
4083                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4084         }
4085
4086         /*Configure queue statistics registers*/
4087         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4088
4089         /* Check if the PFC is supported */
4090         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4091                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4092                 for (i = 0; i < nb_tcs; i++) {
4093                         /*
4094                         * If the TC count is 8,and the default high_water is 48,
4095                         * the low_water is 16 as default.
4096                         */
4097                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4098                         hw->fc.low_water[i] = pbsize / 4;
4099                         /* Enable pfc for this TC */
4100                         tc = &dcb_config->tc_config[i];
4101                         tc->pfc = ixgbe_dcb_pfc_enabled;
4102                 }
4103                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4104                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4105                         pfc_en &= 0x0F;
4106                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4107         }
4108
4109         return ret;
4110 }
4111
4112 /**
4113  * ixgbe_configure_dcb - Configure DCB  Hardware
4114  * @dev: pointer to rte_eth_dev
4115  */
4116 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4117 {
4118         struct ixgbe_dcb_config *dcb_cfg =
4119                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4120         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4121
4122         PMD_INIT_FUNC_TRACE();
4123
4124         /* check support mq_mode for DCB */
4125         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4126             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4127             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4128                 return;
4129
4130         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4131                 return;
4132
4133         /** Configure DCB hardware **/
4134         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4135 }
4136
4137 /*
4138  * VMDq only support for 10 GbE NIC.
4139  */
4140 static void
4141 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4142 {
4143         struct rte_eth_vmdq_rx_conf *cfg;
4144         struct ixgbe_hw *hw;
4145         enum rte_eth_nb_pools num_pools;
4146         uint32_t mrqc, vt_ctl, vlanctrl;
4147         uint32_t vmolr = 0;
4148         int i;
4149
4150         PMD_INIT_FUNC_TRACE();
4151         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4152         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4153         num_pools = cfg->nb_queue_pools;
4154
4155         ixgbe_rss_disable(dev);
4156
4157         /* MRQC: enable vmdq */
4158         mrqc = IXGBE_MRQC_VMDQEN;
4159         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4160
4161         /* PFVTCTL: turn on virtualisation and set the default pool */
4162         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4163         if (cfg->enable_default_pool)
4164                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4165         else
4166                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4167
4168         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4169
4170         for (i = 0; i < (int)num_pools; i++) {
4171                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4172                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4173         }
4174
4175         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4176         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4177         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4178         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4179
4180         /* VFTA - enable all vlan filters */
4181         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4182                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4183
4184         /* VFRE: pool enabling for receive - 64 */
4185         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4186         if (num_pools == ETH_64_POOLS)
4187                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4188
4189         /*
4190          * MPSAR - allow pools to read specific mac addresses
4191          * In this case, all pools should be able to read from mac addr 0
4192          */
4193         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4194         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4195
4196         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4197         for (i = 0; i < cfg->nb_pool_maps; i++) {
4198                 /* set vlan id in VF register and set the valid bit */
4199                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4200                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4201                 /*
4202                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4203                  * pools, we only need to use the first half of the register
4204                  * i.e. bits 0-31
4205                  */
4206                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4207                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4208                                         (cfg->pool_map[i].pools & UINT32_MAX));
4209                 else
4210                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4211                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4212
4213         }
4214
4215         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4216         if (cfg->enable_loop_back) {
4217                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4218                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4219                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4220         }
4221
4222         IXGBE_WRITE_FLUSH(hw);
4223 }
4224
4225 /*
4226  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4227  * @hw: pointer to hardware structure
4228  */
4229 static void
4230 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4231 {
4232         uint32_t reg;
4233         uint32_t q;
4234
4235         PMD_INIT_FUNC_TRACE();
4236         /*PF VF Transmit Enable*/
4237         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4238         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4239
4240         /* Disable the Tx desc arbiter so that MTQC can be changed */
4241         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4242         reg |= IXGBE_RTTDCS_ARBDIS;
4243         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4244
4245         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4246         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4247
4248         /* Disable drop for all queues */
4249         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4250                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4251                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4252
4253         /* Enable the Tx desc arbiter */
4254         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4255         reg &= ~IXGBE_RTTDCS_ARBDIS;
4256         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4257
4258         IXGBE_WRITE_FLUSH(hw);
4259 }
4260
4261 static int __attribute__((cold))
4262 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4263 {
4264         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4265         uint64_t dma_addr;
4266         unsigned int i;
4267
4268         /* Initialize software ring entries */
4269         for (i = 0; i < rxq->nb_rx_desc; i++) {
4270                 volatile union ixgbe_adv_rx_desc *rxd;
4271                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4272
4273                 if (mbuf == NULL) {
4274                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4275                                      (unsigned) rxq->queue_id);
4276                         return -ENOMEM;
4277                 }
4278
4279                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4280                 mbuf->port = rxq->port_id;
4281
4282                 dma_addr =
4283                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4284                 rxd = &rxq->rx_ring[i];
4285                 rxd->read.hdr_addr = 0;
4286                 rxd->read.pkt_addr = dma_addr;
4287                 rxe[i].mbuf = mbuf;
4288         }
4289
4290         return 0;
4291 }
4292
4293 static int
4294 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4295 {
4296         struct ixgbe_hw *hw;
4297         uint32_t mrqc;
4298
4299         ixgbe_rss_configure(dev);
4300
4301         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4302
4303         /* MRQC: enable VF RSS */
4304         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4305         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4306         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4307         case ETH_64_POOLS:
4308                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4309                 break;
4310
4311         case ETH_32_POOLS:
4312                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4313                 break;
4314
4315         default:
4316                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4317                 return -EINVAL;
4318         }
4319
4320         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4321
4322         return 0;
4323 }
4324
4325 static int
4326 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4327 {
4328         struct ixgbe_hw *hw =
4329                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4330
4331         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4332         case ETH_64_POOLS:
4333                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4334                         IXGBE_MRQC_VMDQEN);
4335                 break;
4336
4337         case ETH_32_POOLS:
4338                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4339                         IXGBE_MRQC_VMDQRT4TCEN);
4340                 break;
4341
4342         case ETH_16_POOLS:
4343                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4344                         IXGBE_MRQC_VMDQRT8TCEN);
4345                 break;
4346         default:
4347                 PMD_INIT_LOG(ERR,
4348                         "invalid pool number in IOV mode");
4349                 break;
4350         }
4351         return 0;
4352 }
4353
4354 static int
4355 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4356 {
4357         struct ixgbe_hw *hw =
4358                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4359
4360         if (hw->mac.type == ixgbe_mac_82598EB)
4361                 return 0;
4362
4363         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4364                 /*
4365                  * SRIOV inactive scheme
4366                  * any DCB/RSS w/o VMDq multi-queue setting
4367                  */
4368                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4369                 case ETH_MQ_RX_RSS:
4370                 case ETH_MQ_RX_DCB_RSS:
4371                 case ETH_MQ_RX_VMDQ_RSS:
4372                         ixgbe_rss_configure(dev);
4373                         break;
4374
4375                 case ETH_MQ_RX_VMDQ_DCB:
4376                         ixgbe_vmdq_dcb_configure(dev);
4377                         break;
4378
4379                 case ETH_MQ_RX_VMDQ_ONLY:
4380                         ixgbe_vmdq_rx_hw_configure(dev);
4381                         break;
4382
4383                 case ETH_MQ_RX_NONE:
4384                 default:
4385                         /* if mq_mode is none, disable rss mode.*/
4386                         ixgbe_rss_disable(dev);
4387                         break;
4388                 }
4389         } else {
4390                 /* SRIOV active scheme
4391                  * Support RSS together with SRIOV.
4392                  */
4393                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4394                 case ETH_MQ_RX_RSS:
4395                 case ETH_MQ_RX_VMDQ_RSS:
4396                         ixgbe_config_vf_rss(dev);
4397                         break;
4398                 case ETH_MQ_RX_VMDQ_DCB:
4399                 case ETH_MQ_RX_DCB:
4400                 /* In SRIOV, the configuration is the same as VMDq case */
4401                         ixgbe_vmdq_dcb_configure(dev);
4402                         break;
4403                 /* DCB/RSS together with SRIOV is not supported */
4404                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4405                 case ETH_MQ_RX_DCB_RSS:
4406                         PMD_INIT_LOG(ERR,
4407                                 "Could not support DCB/RSS with VMDq & SRIOV");
4408                         return -1;
4409                 default:
4410                         ixgbe_config_vf_default(dev);
4411                         break;
4412                 }
4413         }
4414
4415         return 0;
4416 }
4417
4418 static int
4419 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4420 {
4421         struct ixgbe_hw *hw =
4422                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4423         uint32_t mtqc;
4424         uint32_t rttdcs;
4425
4426         if (hw->mac.type == ixgbe_mac_82598EB)
4427                 return 0;
4428
4429         /* disable arbiter before setting MTQC */
4430         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4431         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4432         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4433
4434         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4435                 /*
4436                  * SRIOV inactive scheme
4437                  * any DCB w/o VMDq multi-queue setting
4438                  */
4439                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4440                         ixgbe_vmdq_tx_hw_configure(hw);
4441                 else {
4442                         mtqc = IXGBE_MTQC_64Q_1PB;
4443                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4444                 }
4445         } else {
4446                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4447
4448                 /*
4449                  * SRIOV active scheme
4450                  * FIXME if support DCB together with VMDq & SRIOV
4451                  */
4452                 case ETH_64_POOLS:
4453                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4454                         break;
4455                 case ETH_32_POOLS:
4456                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4457                         break;
4458                 case ETH_16_POOLS:
4459                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4460                                 IXGBE_MTQC_8TC_8TQ;
4461                         break;
4462                 default:
4463                         mtqc = IXGBE_MTQC_64Q_1PB;
4464                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4465                 }
4466                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4467         }
4468
4469         /* re-enable arbiter */
4470         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4471         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4472
4473         return 0;
4474 }
4475
4476 /**
4477  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4478  *
4479  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4480  * spec rev. 3.0 chapter 8.2.3.8.13.
4481  *
4482  * @pool Memory pool of the Rx queue
4483  */
4484 static inline uint32_t
4485 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4486 {
4487         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4488
4489         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4490         uint16_t maxdesc =
4491                 IPV4_MAX_PKT_LEN /
4492                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4493
4494         if (maxdesc >= 16)
4495                 return IXGBE_RSCCTL_MAXDESC_16;
4496         else if (maxdesc >= 8)
4497                 return IXGBE_RSCCTL_MAXDESC_8;
4498         else if (maxdesc >= 4)
4499                 return IXGBE_RSCCTL_MAXDESC_4;
4500         else
4501                 return IXGBE_RSCCTL_MAXDESC_1;
4502 }
4503
4504 /**
4505  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4506  * interrupt
4507  *
4508  * (Taken from FreeBSD tree)
4509  * (yes this is all very magic and confusing :)
4510  *
4511  * @dev port handle
4512  * @entry the register array entry
4513  * @vector the MSIX vector for this queue
4514  * @type RX/TX/MISC
4515  */
4516 static void
4517 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4518 {
4519         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4520         u32 ivar, index;
4521
4522         vector |= IXGBE_IVAR_ALLOC_VAL;
4523
4524         switch (hw->mac.type) {
4525
4526         case ixgbe_mac_82598EB:
4527                 if (type == -1)
4528                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4529                 else
4530                         entry += (type * 64);
4531                 index = (entry >> 2) & 0x1F;
4532                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4533                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4534                 ivar |= (vector << (8 * (entry & 0x3)));
4535                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4536                 break;
4537
4538         case ixgbe_mac_82599EB:
4539         case ixgbe_mac_X540:
4540                 if (type == -1) { /* MISC IVAR */
4541                         index = (entry & 1) * 8;
4542                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4543                         ivar &= ~(0xFF << index);
4544                         ivar |= (vector << index);
4545                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4546                 } else {        /* RX/TX IVARS */
4547                         index = (16 * (entry & 1)) + (8 * type);
4548                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4549                         ivar &= ~(0xFF << index);
4550                         ivar |= (vector << index);
4551                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4552                 }
4553
4554                 break;
4555
4556         default:
4557                 break;
4558         }
4559 }
4560
4561 void __attribute__((cold))
4562 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4563 {
4564         uint16_t i, rx_using_sse;
4565         struct ixgbe_adapter *adapter =
4566                 (struct ixgbe_adapter *)dev->data->dev_private;
4567
4568         /*
4569          * In order to allow Vector Rx there are a few configuration
4570          * conditions to be met and Rx Bulk Allocation should be allowed.
4571          */
4572         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4573             !adapter->rx_bulk_alloc_allowed) {
4574                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4575                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4576                                     "not enabled",
4577                              dev->data->port_id);
4578
4579                 adapter->rx_vec_allowed = false;
4580         }
4581
4582         /*
4583          * Initialize the appropriate LRO callback.
4584          *
4585          * If all queues satisfy the bulk allocation preconditions
4586          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4587          * Otherwise use a single allocation version.
4588          */
4589         if (dev->data->lro) {
4590                 if (adapter->rx_bulk_alloc_allowed) {
4591                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4592                                            "allocation version");
4593                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4594                 } else {
4595                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4596                                            "allocation version");
4597                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4598                 }
4599         } else if (dev->data->scattered_rx) {
4600                 /*
4601                  * Set the non-LRO scattered callback: there are Vector and
4602                  * single allocation versions.
4603                  */
4604                 if (adapter->rx_vec_allowed) {
4605                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4606                                             "callback (port=%d).",
4607                                      dev->data->port_id);
4608
4609                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4610                 } else if (adapter->rx_bulk_alloc_allowed) {
4611                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4612                                            "allocation callback (port=%d).",
4613                                      dev->data->port_id);
4614                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4615                 } else {
4616                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4617                                             "single allocation) "
4618                                             "Scattered Rx callback "
4619                                             "(port=%d).",
4620                                      dev->data->port_id);
4621
4622                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4623                 }
4624         /*
4625          * Below we set "simple" callbacks according to port/queues parameters.
4626          * If parameters allow we are going to choose between the following
4627          * callbacks:
4628          *    - Vector
4629          *    - Bulk Allocation
4630          *    - Single buffer allocation (the simplest one)
4631          */
4632         } else if (adapter->rx_vec_allowed) {
4633                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4634                                     "burst size no less than %d (port=%d).",
4635                              RTE_IXGBE_DESCS_PER_LOOP,
4636                              dev->data->port_id);
4637
4638                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4639         } else if (adapter->rx_bulk_alloc_allowed) {
4640                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4641                                     "satisfied. Rx Burst Bulk Alloc function "
4642                                     "will be used on port=%d.",
4643                              dev->data->port_id);
4644
4645                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4646         } else {
4647                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4648                                     "satisfied, or Scattered Rx is requested "
4649                                     "(port=%d).",
4650                              dev->data->port_id);
4651
4652                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4653         }
4654
4655         /* Propagate information about RX function choice through all queues. */
4656
4657         rx_using_sse =
4658                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4659                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4660
4661         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4662                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4663
4664                 rxq->rx_using_sse = rx_using_sse;
4665 #ifdef RTE_LIBRTE_SECURITY
4666                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4667                                 DEV_RX_OFFLOAD_SECURITY);
4668 #endif
4669         }
4670 }
4671
4672 /**
4673  * ixgbe_set_rsc - configure RSC related port HW registers
4674  *
4675  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4676  * of 82599 Spec (x540 configuration is virtually the same).
4677  *
4678  * @dev port handle
4679  *
4680  * Returns 0 in case of success or a non-zero error code
4681  */
4682 static int
4683 ixgbe_set_rsc(struct rte_eth_dev *dev)
4684 {
4685         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4686         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4687         struct rte_eth_dev_info dev_info = { 0 };
4688         bool rsc_capable = false;
4689         uint16_t i;
4690         uint32_t rdrxctl;
4691         uint32_t rfctl;
4692
4693         /* Sanity check */
4694         dev->dev_ops->dev_infos_get(dev, &dev_info);
4695         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4696                 rsc_capable = true;
4697
4698         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4699                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4700                                    "support it");
4701                 return -EINVAL;
4702         }
4703
4704         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4705
4706         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4707              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4708                 /*
4709                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4710                  * 3.0 RSC configuration requires HW CRC stripping being
4711                  * enabled. If user requested both HW CRC stripping off
4712                  * and RSC on - return an error.
4713                  */
4714                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4715                                     "is disabled");
4716                 return -EINVAL;
4717         }
4718
4719         /* RFCTL configuration  */
4720         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4721         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4722                 /*
4723                  * Since NFS packets coalescing is not supported - clear
4724                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4725                  * enabled.
4726                  */
4727                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4728                            IXGBE_RFCTL_NFSR_DIS);
4729         else
4730                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4731         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4732
4733         /* If LRO hasn't been requested - we are done here. */
4734         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4735                 return 0;
4736
4737         /* Set RDRXCTL.RSCACKC bit */
4738         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4739         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4740         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4741
4742         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4743         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4744                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4745                 uint32_t srrctl =
4746                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4747                 uint32_t rscctl =
4748                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4749                 uint32_t psrtype =
4750                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4751                 uint32_t eitr =
4752                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4753
4754                 /*
4755                  * ixgbe PMD doesn't support header-split at the moment.
4756                  *
4757                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4758                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4759                  * should be configured even if header split is not
4760                  * enabled. We will configure it 128 bytes following the
4761                  * recommendation in the spec.
4762                  */
4763                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4764                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4765                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4766
4767                 /*
4768                  * TODO: Consider setting the Receive Descriptor Minimum
4769                  * Threshold Size for an RSC case. This is not an obviously
4770                  * beneficiary option but the one worth considering...
4771                  */
4772
4773                 rscctl |= IXGBE_RSCCTL_RSCEN;
4774                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4775                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4776
4777                 /*
4778                  * RSC: Set ITR interval corresponding to 2K ints/s.
4779                  *
4780                  * Full-sized RSC aggregations for a 10Gb/s link will
4781                  * arrive at about 20K aggregation/s rate.
4782                  *
4783                  * 2K inst/s rate will make only 10% of the
4784                  * aggregations to be closed due to the interrupt timer
4785                  * expiration for a streaming at wire-speed case.
4786                  *
4787                  * For a sparse streaming case this setting will yield
4788                  * at most 500us latency for a single RSC aggregation.
4789                  */
4790                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4791                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4792                 eitr |= IXGBE_EITR_CNT_WDIS;
4793
4794                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4795                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4796                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4797                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4798
4799                 /*
4800                  * RSC requires the mapping of the queue to the
4801                  * interrupt vector.
4802                  */
4803                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4804         }
4805
4806         dev->data->lro = 1;
4807
4808         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4809
4810         return 0;
4811 }
4812
4813 /*
4814  * Initializes Receive Unit.
4815  */
4816 int __attribute__((cold))
4817 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4818 {
4819         struct ixgbe_hw     *hw;
4820         struct ixgbe_rx_queue *rxq;
4821         uint64_t bus_addr;
4822         uint32_t rxctrl;
4823         uint32_t fctrl;
4824         uint32_t hlreg0;
4825         uint32_t maxfrs;
4826         uint32_t srrctl;
4827         uint32_t rdrxctl;
4828         uint32_t rxcsum;
4829         uint16_t buf_size;
4830         uint16_t i;
4831         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4832         int rc;
4833
4834         PMD_INIT_FUNC_TRACE();
4835         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4836
4837         /*
4838          * Make sure receives are disabled while setting
4839          * up the RX context (registers, descriptor rings, etc.).
4840          */
4841         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4842         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4843
4844         /* Enable receipt of broadcasted frames */
4845         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4846         fctrl |= IXGBE_FCTRL_BAM;
4847         fctrl |= IXGBE_FCTRL_DPF;
4848         fctrl |= IXGBE_FCTRL_PMCF;
4849         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4850
4851         /*
4852          * Configure CRC stripping, if any.
4853          */
4854         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4855         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4856                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4857         else
4858                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4859
4860         /*
4861          * Configure jumbo frame support, if any.
4862          */
4863         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
4864                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4865                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4866                 maxfrs &= 0x0000FFFF;
4867                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4868                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4869         } else
4870                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4871
4872         /*
4873          * If loopback mode is configured for 82599, set LPBK bit.
4874          */
4875         if (hw->mac.type == ixgbe_mac_82599EB &&
4876                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4877                 hlreg0 |= IXGBE_HLREG0_LPBK;
4878         else
4879                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4880
4881         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4882
4883         /*
4884          * Assume no header split and no VLAN strip support
4885          * on any Rx queue first .
4886          */
4887         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
4888         /* Setup RX queues */
4889         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4890                 rxq = dev->data->rx_queues[i];
4891
4892                 /*
4893                  * Reset crc_len in case it was changed after queue setup by a
4894                  * call to configure.
4895                  */
4896                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4897                         rxq->crc_len = ETHER_CRC_LEN;
4898                 else
4899                         rxq->crc_len = 0;
4900
4901                 /* Setup the Base and Length of the Rx Descriptor Rings */
4902                 bus_addr = rxq->rx_ring_phys_addr;
4903                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4904                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4905                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4906                                 (uint32_t)(bus_addr >> 32));
4907                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4908                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4909                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4910                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4911
4912                 /* Configure the SRRCTL register */
4913                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4914
4915                 /* Set if packets are dropped when no descriptors available */
4916                 if (rxq->drop_en)
4917                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4918
4919                 /*
4920                  * Configure the RX buffer size in the BSIZEPACKET field of
4921                  * the SRRCTL register of the queue.
4922                  * The value is in 1 KB resolution. Valid values can be from
4923                  * 1 KB to 16 KB.
4924                  */
4925                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4926                         RTE_PKTMBUF_HEADROOM);
4927                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4928                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4929
4930                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4931
4932                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4933                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4934
4935                 /* It adds dual VLAN length for supporting dual VLAN */
4936                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4937                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4938                         dev->data->scattered_rx = 1;
4939                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
4940                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
4941         }
4942
4943         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
4944                 dev->data->scattered_rx = 1;
4945
4946         /*
4947          * Device configured with multiple RX queues.
4948          */
4949         ixgbe_dev_mq_rx_configure(dev);
4950
4951         /*
4952          * Setup the Checksum Register.
4953          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4954          * Enable IP/L4 checkum computation by hardware if requested to do so.
4955          */
4956         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4957         rxcsum |= IXGBE_RXCSUM_PCSD;
4958         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
4959                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4960         else
4961                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4962
4963         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4964
4965         if (hw->mac.type == ixgbe_mac_82599EB ||
4966             hw->mac.type == ixgbe_mac_X540) {
4967                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4968                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
4969                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4970                 else
4971                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4972                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4973                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4974         }
4975
4976         rc = ixgbe_set_rsc(dev);
4977         if (rc)
4978                 return rc;
4979
4980         ixgbe_set_rx_function(dev);
4981
4982         return 0;
4983 }
4984
4985 /*
4986  * Initializes Transmit Unit.
4987  */
4988 void __attribute__((cold))
4989 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4990 {
4991         struct ixgbe_hw     *hw;
4992         struct ixgbe_tx_queue *txq;
4993         uint64_t bus_addr;
4994         uint32_t hlreg0;
4995         uint32_t txctrl;
4996         uint16_t i;
4997
4998         PMD_INIT_FUNC_TRACE();
4999         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5000
5001         /* Enable TX CRC (checksum offload requirement) and hw padding
5002          * (TSO requirement)
5003          */
5004         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5005         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5006         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5007
5008         /* Setup the Base and Length of the Tx Descriptor Rings */
5009         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5010                 txq = dev->data->tx_queues[i];
5011
5012                 bus_addr = txq->tx_ring_phys_addr;
5013                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5014                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5015                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5016                                 (uint32_t)(bus_addr >> 32));
5017                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5018                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5019                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5020                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5021                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5022
5023                 /*
5024                  * Disable Tx Head Writeback RO bit, since this hoses
5025                  * bookkeeping if things aren't delivered in order.
5026                  */
5027                 switch (hw->mac.type) {
5028                 case ixgbe_mac_82598EB:
5029                         txctrl = IXGBE_READ_REG(hw,
5030                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5031                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5032                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5033                                         txctrl);
5034                         break;
5035
5036                 case ixgbe_mac_82599EB:
5037                 case ixgbe_mac_X540:
5038                 case ixgbe_mac_X550:
5039                 case ixgbe_mac_X550EM_x:
5040                 case ixgbe_mac_X550EM_a:
5041                 default:
5042                         txctrl = IXGBE_READ_REG(hw,
5043                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5044                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5045                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5046                                         txctrl);
5047                         break;
5048                 }
5049         }
5050
5051         /* Device configured with multiple TX queues. */
5052         ixgbe_dev_mq_tx_configure(dev);
5053 }
5054
5055 /*
5056  * Set up link for 82599 loopback mode Tx->Rx.
5057  */
5058 static inline void __attribute__((cold))
5059 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5060 {
5061         PMD_INIT_FUNC_TRACE();
5062
5063         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5064                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5065                                 IXGBE_SUCCESS) {
5066                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5067                         /* ignore error */
5068                         return;
5069                 }
5070         }
5071
5072         /* Restart link */
5073         IXGBE_WRITE_REG(hw,
5074                         IXGBE_AUTOC,
5075                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5076         ixgbe_reset_pipeline_82599(hw);
5077
5078         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5079         msec_delay(50);
5080 }
5081
5082
5083 /*
5084  * Start Transmit and Receive Units.
5085  */
5086 int __attribute__((cold))
5087 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5088 {
5089         struct ixgbe_hw     *hw;
5090         struct ixgbe_tx_queue *txq;
5091         struct ixgbe_rx_queue *rxq;
5092         uint32_t txdctl;
5093         uint32_t dmatxctl;
5094         uint32_t rxctrl;
5095         uint16_t i;
5096         int ret = 0;
5097
5098         PMD_INIT_FUNC_TRACE();
5099         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5100
5101         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5102                 txq = dev->data->tx_queues[i];
5103                 /* Setup Transmit Threshold Registers */
5104                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5105                 txdctl |= txq->pthresh & 0x7F;
5106                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5107                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5108                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5109         }
5110
5111         if (hw->mac.type != ixgbe_mac_82598EB) {
5112                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5113                 dmatxctl |= IXGBE_DMATXCTL_TE;
5114                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5115         }
5116
5117         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5118                 txq = dev->data->tx_queues[i];
5119                 if (!txq->tx_deferred_start) {
5120                         ret = ixgbe_dev_tx_queue_start(dev, i);
5121                         if (ret < 0)
5122                                 return ret;
5123                 }
5124         }
5125
5126         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5127                 rxq = dev->data->rx_queues[i];
5128                 if (!rxq->rx_deferred_start) {
5129                         ret = ixgbe_dev_rx_queue_start(dev, i);
5130                         if (ret < 0)
5131                                 return ret;
5132                 }
5133         }
5134
5135         /* Enable Receive engine */
5136         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5137         if (hw->mac.type == ixgbe_mac_82598EB)
5138                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5139         rxctrl |= IXGBE_RXCTRL_RXEN;
5140         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5141
5142         /* If loopback mode is enabled for 82599, set up the link accordingly */
5143         if (hw->mac.type == ixgbe_mac_82599EB &&
5144                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
5145                 ixgbe_setup_loopback_link_82599(hw);
5146
5147 #ifdef RTE_LIBRTE_SECURITY
5148         if ((dev->data->dev_conf.rxmode.offloads &
5149                         DEV_RX_OFFLOAD_SECURITY) ||
5150                 (dev->data->dev_conf.txmode.offloads &
5151                         DEV_TX_OFFLOAD_SECURITY)) {
5152                 ret = ixgbe_crypto_enable_ipsec(dev);
5153                 if (ret != 0) {
5154                         PMD_DRV_LOG(ERR,
5155                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5156                                     ret);
5157                         return ret;
5158                 }
5159         }
5160 #endif
5161
5162         return 0;
5163 }
5164
5165 /*
5166  * Start Receive Units for specified queue.
5167  */
5168 int __attribute__((cold))
5169 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5170 {
5171         struct ixgbe_hw     *hw;
5172         struct ixgbe_rx_queue *rxq;
5173         uint32_t rxdctl;
5174         int poll_ms;
5175
5176         PMD_INIT_FUNC_TRACE();
5177         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5178
5179         rxq = dev->data->rx_queues[rx_queue_id];
5180
5181         /* Allocate buffers for descriptor rings */
5182         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5183                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5184                              rx_queue_id);
5185                 return -1;
5186         }
5187         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5188         rxdctl |= IXGBE_RXDCTL_ENABLE;
5189         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5190
5191         /* Wait until RX Enable ready */
5192         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5193         do {
5194                 rte_delay_ms(1);
5195                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5196         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5197         if (!poll_ms)
5198                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5199         rte_wmb();
5200         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5201         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5202         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5203
5204         return 0;
5205 }
5206
5207 /*
5208  * Stop Receive Units for specified queue.
5209  */
5210 int __attribute__((cold))
5211 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5212 {
5213         struct ixgbe_hw     *hw;
5214         struct ixgbe_adapter *adapter =
5215                 (struct ixgbe_adapter *)dev->data->dev_private;
5216         struct ixgbe_rx_queue *rxq;
5217         uint32_t rxdctl;
5218         int poll_ms;
5219
5220         PMD_INIT_FUNC_TRACE();
5221         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5222
5223         rxq = dev->data->rx_queues[rx_queue_id];
5224
5225         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5226         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5227         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5228
5229         /* Wait until RX Enable bit clear */
5230         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5231         do {
5232                 rte_delay_ms(1);
5233                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5234         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5235         if (!poll_ms)
5236                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5237
5238         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5239
5240         ixgbe_rx_queue_release_mbufs(rxq);
5241         ixgbe_reset_rx_queue(adapter, rxq);
5242         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5243
5244         return 0;
5245 }
5246
5247
5248 /*
5249  * Start Transmit Units for specified queue.
5250  */
5251 int __attribute__((cold))
5252 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5253 {
5254         struct ixgbe_hw     *hw;
5255         struct ixgbe_tx_queue *txq;
5256         uint32_t txdctl;
5257         int poll_ms;
5258
5259         PMD_INIT_FUNC_TRACE();
5260         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5261
5262         txq = dev->data->tx_queues[tx_queue_id];
5263         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5264         txdctl |= IXGBE_TXDCTL_ENABLE;
5265         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5266
5267         /* Wait until TX Enable ready */
5268         if (hw->mac.type == ixgbe_mac_82599EB) {
5269                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5270                 do {
5271                         rte_delay_ms(1);
5272                         txdctl = IXGBE_READ_REG(hw,
5273                                 IXGBE_TXDCTL(txq->reg_idx));
5274                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5275                 if (!poll_ms)
5276                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5277                                 tx_queue_id);
5278         }
5279         rte_wmb();
5280         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5281         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5282         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5283
5284         return 0;
5285 }
5286
5287 /*
5288  * Stop Transmit Units for specified queue.
5289  */
5290 int __attribute__((cold))
5291 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5292 {
5293         struct ixgbe_hw     *hw;
5294         struct ixgbe_tx_queue *txq;
5295         uint32_t txdctl;
5296         uint32_t txtdh, txtdt;
5297         int poll_ms;
5298
5299         PMD_INIT_FUNC_TRACE();
5300         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5301
5302         txq = dev->data->tx_queues[tx_queue_id];
5303
5304         /* Wait until TX queue is empty */
5305         if (hw->mac.type == ixgbe_mac_82599EB) {
5306                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5307                 do {
5308                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5309                         txtdh = IXGBE_READ_REG(hw,
5310                                                IXGBE_TDH(txq->reg_idx));
5311                         txtdt = IXGBE_READ_REG(hw,
5312                                                IXGBE_TDT(txq->reg_idx));
5313                 } while (--poll_ms && (txtdh != txtdt));
5314                 if (!poll_ms)
5315                         PMD_INIT_LOG(ERR,
5316                                 "Tx Queue %d is not empty when stopping.",
5317                                 tx_queue_id);
5318         }
5319
5320         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5321         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5322         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5323
5324         /* Wait until TX Enable bit clear */
5325         if (hw->mac.type == ixgbe_mac_82599EB) {
5326                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5327                 do {
5328                         rte_delay_ms(1);
5329                         txdctl = IXGBE_READ_REG(hw,
5330                                                 IXGBE_TXDCTL(txq->reg_idx));
5331                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5332                 if (!poll_ms)
5333                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5334                                 tx_queue_id);
5335         }
5336
5337         if (txq->ops != NULL) {
5338                 txq->ops->release_mbufs(txq);
5339                 txq->ops->reset(txq);
5340         }
5341         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5342
5343         return 0;
5344 }
5345
5346 void
5347 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5348         struct rte_eth_rxq_info *qinfo)
5349 {
5350         struct ixgbe_rx_queue *rxq;
5351
5352         rxq = dev->data->rx_queues[queue_id];
5353
5354         qinfo->mp = rxq->mb_pool;
5355         qinfo->scattered_rx = dev->data->scattered_rx;
5356         qinfo->nb_desc = rxq->nb_rx_desc;
5357
5358         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5359         qinfo->conf.rx_drop_en = rxq->drop_en;
5360         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5361         qinfo->conf.offloads = rxq->offloads;
5362 }
5363
5364 void
5365 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5366         struct rte_eth_txq_info *qinfo)
5367 {
5368         struct ixgbe_tx_queue *txq;
5369
5370         txq = dev->data->tx_queues[queue_id];
5371
5372         qinfo->nb_desc = txq->nb_tx_desc;
5373
5374         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5375         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5376         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5377
5378         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5379         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5380         qinfo->conf.offloads = txq->offloads;
5381         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5382 }
5383
5384 /*
5385  * [VF] Initializes Receive Unit.
5386  */
5387 int __attribute__((cold))
5388 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5389 {
5390         struct ixgbe_hw     *hw;
5391         struct ixgbe_rx_queue *rxq;
5392         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5393         uint64_t bus_addr;
5394         uint32_t srrctl, psrtype = 0;
5395         uint16_t buf_size;
5396         uint16_t i;
5397         int ret;
5398
5399         PMD_INIT_FUNC_TRACE();
5400         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5401
5402         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5403                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5404                         "it should be power of 2");
5405                 return -1;
5406         }
5407
5408         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5409                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5410                         "it should be equal to or less than %d",
5411                         hw->mac.max_rx_queues);
5412                 return -1;
5413         }
5414
5415         /*
5416          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5417          * disables the VF receipt of packets if the PF MTU is > 1500.
5418          * This is done to deal with 82599 limitations that imposes
5419          * the PF and all VFs to share the same MTU.
5420          * Then, the PF driver enables again the VF receipt of packet when
5421          * the VF driver issues a IXGBE_VF_SET_LPE request.
5422          * In the meantime, the VF device cannot be used, even if the VF driver
5423          * and the Guest VM network stack are ready to accept packets with a
5424          * size up to the PF MTU.
5425          * As a work-around to this PF behaviour, force the call to
5426          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5427          * VF packets received can work in all cases.
5428          */
5429         ixgbevf_rlpml_set_vf(hw,
5430                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5431
5432         /*
5433          * Assume no header split and no VLAN strip support
5434          * on any Rx queue first .
5435          */
5436         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5437         /* Setup RX queues */
5438         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5439                 rxq = dev->data->rx_queues[i];
5440
5441                 /* Allocate buffers for descriptor rings */
5442                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5443                 if (ret)
5444                         return ret;
5445
5446                 /* Setup the Base and Length of the Rx Descriptor Rings */
5447                 bus_addr = rxq->rx_ring_phys_addr;
5448
5449                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5450                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5451                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5452                                 (uint32_t)(bus_addr >> 32));
5453                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5454                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5455                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5456                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5457
5458
5459                 /* Configure the SRRCTL register */
5460                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5461
5462                 /* Set if packets are dropped when no descriptors available */
5463                 if (rxq->drop_en)
5464                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5465
5466                 /*
5467                  * Configure the RX buffer size in the BSIZEPACKET field of
5468                  * the SRRCTL register of the queue.
5469                  * The value is in 1 KB resolution. Valid values can be from
5470                  * 1 KB to 16 KB.
5471                  */
5472                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5473                         RTE_PKTMBUF_HEADROOM);
5474                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5475                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5476
5477                 /*
5478                  * VF modification to write virtual function SRRCTL register
5479                  */
5480                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5481
5482                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5483                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5484
5485                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5486                     /* It adds dual VLAN length for supporting dual VLAN */
5487                     (rxmode->max_rx_pkt_len +
5488                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5489                         if (!dev->data->scattered_rx)
5490                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5491                         dev->data->scattered_rx = 1;
5492                 }
5493
5494                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5495                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5496         }
5497
5498         /* Set RQPL for VF RSS according to max Rx queue */
5499         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5500                 IXGBE_PSRTYPE_RQPL_SHIFT;
5501         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5502
5503         ixgbe_set_rx_function(dev);
5504
5505         return 0;
5506 }
5507
5508 /*
5509  * [VF] Initializes Transmit Unit.
5510  */
5511 void __attribute__((cold))
5512 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5513 {
5514         struct ixgbe_hw     *hw;
5515         struct ixgbe_tx_queue *txq;
5516         uint64_t bus_addr;
5517         uint32_t txctrl;
5518         uint16_t i;
5519
5520         PMD_INIT_FUNC_TRACE();
5521         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5522
5523         /* Setup the Base and Length of the Tx Descriptor Rings */
5524         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5525                 txq = dev->data->tx_queues[i];
5526                 bus_addr = txq->tx_ring_phys_addr;
5527                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5528                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5529                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5530                                 (uint32_t)(bus_addr >> 32));
5531                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5532                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5533                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5534                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5535                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5536
5537                 /*
5538                  * Disable Tx Head Writeback RO bit, since this hoses
5539                  * bookkeeping if things aren't delivered in order.
5540                  */
5541                 txctrl = IXGBE_READ_REG(hw,
5542                                 IXGBE_VFDCA_TXCTRL(i));
5543                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5544                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5545                                 txctrl);
5546         }
5547 }
5548
5549 /*
5550  * [VF] Start Transmit and Receive Units.
5551  */
5552 void __attribute__((cold))
5553 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5554 {
5555         struct ixgbe_hw     *hw;
5556         struct ixgbe_tx_queue *txq;
5557         struct ixgbe_rx_queue *rxq;
5558         uint32_t txdctl;
5559         uint32_t rxdctl;
5560         uint16_t i;
5561         int poll_ms;
5562
5563         PMD_INIT_FUNC_TRACE();
5564         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5565
5566         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5567                 txq = dev->data->tx_queues[i];
5568                 /* Setup Transmit Threshold Registers */
5569                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5570                 txdctl |= txq->pthresh & 0x7F;
5571                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5572                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5573                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5574         }
5575
5576         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5577
5578                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5579                 txdctl |= IXGBE_TXDCTL_ENABLE;
5580                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5581
5582                 poll_ms = 10;
5583                 /* Wait until TX Enable ready */
5584                 do {
5585                         rte_delay_ms(1);
5586                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5587                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5588                 if (!poll_ms)
5589                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5590         }
5591         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5592
5593                 rxq = dev->data->rx_queues[i];
5594
5595                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5596                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5597                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5598
5599                 /* Wait until RX Enable ready */
5600                 poll_ms = 10;
5601                 do {
5602                         rte_delay_ms(1);
5603                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5604                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5605                 if (!poll_ms)
5606                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5607                 rte_wmb();
5608                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5609
5610         }
5611 }
5612
5613 int
5614 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5615                     const struct rte_flow_action_rss *in)
5616 {
5617         if (in->key_len > RTE_DIM(out->key) ||
5618             in->queue_num > RTE_DIM(out->queue))
5619                 return -EINVAL;
5620         out->conf = (struct rte_flow_action_rss){
5621                 .func = in->func,
5622                 .level = in->level,
5623                 .types = in->types,
5624                 .key_len = in->key_len,
5625                 .queue_num = in->queue_num,
5626                 .key = memcpy(out->key, in->key, in->key_len),
5627                 .queue = memcpy(out->queue, in->queue,
5628                                 sizeof(*in->queue) * in->queue_num),
5629         };
5630         return 0;
5631 }
5632
5633 int
5634 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5635                       const struct rte_flow_action_rss *with)
5636 {
5637         return (comp->func == with->func &&
5638                 comp->level == with->level &&
5639                 comp->types == with->types &&
5640                 comp->key_len == with->key_len &&
5641                 comp->queue_num == with->queue_num &&
5642                 !memcmp(comp->key, with->key, with->key_len) &&
5643                 !memcmp(comp->queue, with->queue,
5644                         sizeof(*with->queue) * with->queue_num));
5645 }
5646
5647 int
5648 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5649                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5650 {
5651         struct ixgbe_hw *hw;
5652         uint32_t reta;
5653         uint16_t i;
5654         uint16_t j;
5655         uint16_t sp_reta_size;
5656         uint32_t reta_reg;
5657         struct rte_eth_rss_conf rss_conf = {
5658                 .rss_key = conf->conf.key_len ?
5659                         (void *)(uintptr_t)conf->conf.key : NULL,
5660                 .rss_key_len = conf->conf.key_len,
5661                 .rss_hf = conf->conf.types,
5662         };
5663         struct ixgbe_filter_info *filter_info =
5664                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5665
5666         PMD_INIT_FUNC_TRACE();
5667         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5668
5669         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5670
5671         if (!add) {
5672                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5673                                           &conf->conf)) {
5674                         ixgbe_rss_disable(dev);
5675                         memset(&filter_info->rss_info, 0,
5676                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5677                         return 0;
5678                 }
5679                 return -EINVAL;
5680         }
5681
5682         if (filter_info->rss_info.conf.queue_num)
5683                 return -EINVAL;
5684         /* Fill in redirection table
5685          * The byte-swap is needed because NIC registers are in
5686          * little-endian order.
5687          */
5688         reta = 0;
5689         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5690                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5691
5692                 if (j == conf->conf.queue_num)
5693                         j = 0;
5694                 reta = (reta << 8) | conf->conf.queue[j];
5695                 if ((i & 3) == 3)
5696                         IXGBE_WRITE_REG(hw, reta_reg,
5697                                         rte_bswap32(reta));
5698         }
5699
5700         /* Configure the RSS key and the RSS protocols used to compute
5701          * the RSS hash of input packets.
5702          */
5703         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5704                 ixgbe_rss_disable(dev);
5705                 return 0;
5706         }
5707         if (rss_conf.rss_key == NULL)
5708                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5709         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5710
5711         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5712                 return -EINVAL;
5713
5714         return 0;
5715 }
5716
5717 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5718 __rte_weak int
5719 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5720 {
5721         return -1;
5722 }
5723
5724 __rte_weak uint16_t
5725 ixgbe_recv_pkts_vec(
5726         void __rte_unused *rx_queue,
5727         struct rte_mbuf __rte_unused **rx_pkts,
5728         uint16_t __rte_unused nb_pkts)
5729 {
5730         return 0;
5731 }
5732
5733 __rte_weak uint16_t
5734 ixgbe_recv_scattered_pkts_vec(
5735         void __rte_unused *rx_queue,
5736         struct rte_mbuf __rte_unused **rx_pkts,
5737         uint16_t __rte_unused nb_pkts)
5738 {
5739         return 0;
5740 }
5741
5742 __rte_weak int
5743 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5744 {
5745         return -1;
5746 }