80afe83d7cc0215f8d89e6141202dec4c43e9a3f
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
39 #include <rte_udp.h>
40 #include <rte_tcp.h>
41 #include <rte_sctp.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
44 #include <rte_ip.h>
45 #include <rte_net.h>
46 #include <rte_vect.h>
47
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
55
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
58 #else
59 #define IXGBE_TX_IEEE1588_TMST 0
60 #endif
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK (                  \
63                 PKT_TX_OUTER_IPV6 |              \
64                 PKT_TX_OUTER_IPV4 |              \
65                 PKT_TX_IPV6 |                    \
66                 PKT_TX_IPV4 |                    \
67                 PKT_TX_VLAN_PKT |                \
68                 PKT_TX_IP_CKSUM |                \
69                 PKT_TX_L4_MASK |                 \
70                 PKT_TX_TCP_SEG |                 \
71                 PKT_TX_MACSEC |                  \
72                 PKT_TX_OUTER_IP_CKSUM |          \
73                 PKT_TX_SEC_OFFLOAD |     \
74                 IXGBE_TX_IEEE1588_TMST)
75
76 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
77                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
78
79 #if 1
80 #define RTE_PMD_USE_PREFETCH
81 #endif
82
83 #ifdef RTE_PMD_USE_PREFETCH
84 /*
85  * Prefetch a cache line into all cache levels.
86  */
87 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
88 #else
89 #define rte_ixgbe_prefetch(p)   do {} while (0)
90 #endif
91
92 /*********************************************************************
93  *
94  *  TX functions
95  *
96  **********************************************************************/
97
98 /*
99  * Check for descriptors with their DD bit set and free mbufs.
100  * Return the total number of buffers freed.
101  */
102 static __rte_always_inline int
103 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
104 {
105         struct ixgbe_tx_entry *txep;
106         uint32_t status;
107         int i, nb_free = 0;
108         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
109
110         /* check DD bit on threshold descriptor */
111         status = txq->tx_ring[txq->tx_next_dd].wb.status;
112         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
113                 return 0;
114
115         /*
116          * first buffer to free from S/W ring is at index
117          * tx_next_dd - (tx_rs_thresh-1)
118          */
119         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
120
121         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
122                 /* free buffers one at a time */
123                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
124                 txep->mbuf = NULL;
125
126                 if (unlikely(m == NULL))
127                         continue;
128
129                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
130                     (nb_free > 0 && m->pool != free[0]->pool)) {
131                         rte_mempool_put_bulk(free[0]->pool,
132                                              (void **)free, nb_free);
133                         nb_free = 0;
134                 }
135
136                 free[nb_free++] = m;
137         }
138
139         if (nb_free > 0)
140                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
141
142         /* buffers were freed, update counters */
143         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
144         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
145         if (txq->tx_next_dd >= txq->nb_tx_desc)
146                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
147
148         return txq->tx_rs_thresh;
149 }
150
151 /* Populate 4 descriptors with data from 4 mbufs */
152 static inline void
153 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
154 {
155         uint64_t buf_dma_addr;
156         uint32_t pkt_len;
157         int i;
158
159         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
160                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
161                 pkt_len = (*pkts)->data_len;
162
163                 /* write data to descriptor */
164                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
165
166                 txdp->read.cmd_type_len =
167                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
168
169                 txdp->read.olinfo_status =
170                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
171
172                 rte_prefetch0(&(*pkts)->pool);
173         }
174 }
175
176 /* Populate 1 descriptor with data from 1 mbuf */
177 static inline void
178 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
179 {
180         uint64_t buf_dma_addr;
181         uint32_t pkt_len;
182
183         buf_dma_addr = rte_mbuf_data_iova(*pkts);
184         pkt_len = (*pkts)->data_len;
185
186         /* write data to descriptor */
187         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
188         txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190         txdp->read.olinfo_status =
191                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
192         rte_prefetch0(&(*pkts)->pool);
193 }
194
195 /*
196  * Fill H/W descriptor ring with mbuf data.
197  * Copy mbuf pointers to the S/W ring.
198  */
199 static inline void
200 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
201                       uint16_t nb_pkts)
202 {
203         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
204         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
205         const int N_PER_LOOP = 4;
206         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
207         int mainpart, leftover;
208         int i, j;
209
210         /*
211          * Process most of the packets in chunks of N pkts.  Any
212          * leftover packets will get processed one at a time.
213          */
214         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
215         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
216         for (i = 0; i < mainpart; i += N_PER_LOOP) {
217                 /* Copy N mbuf pointers to the S/W ring */
218                 for (j = 0; j < N_PER_LOOP; ++j) {
219                         (txep + i + j)->mbuf = *(pkts + i + j);
220                 }
221                 tx4(txdp + i, pkts + i);
222         }
223
224         if (unlikely(leftover > 0)) {
225                 for (i = 0; i < leftover; ++i) {
226                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
227                         tx1(txdp + mainpart + i, pkts + mainpart + i);
228                 }
229         }
230 }
231
232 static inline uint16_t
233 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
234              uint16_t nb_pkts)
235 {
236         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
237         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
238         uint16_t n = 0;
239
240         /*
241          * Begin scanning the H/W ring for done descriptors when the
242          * number of available descriptors drops below tx_free_thresh.  For
243          * each done descriptor, free the associated buffer.
244          */
245         if (txq->nb_tx_free < txq->tx_free_thresh)
246                 ixgbe_tx_free_bufs(txq);
247
248         /* Only use descriptors that are available */
249         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
250         if (unlikely(nb_pkts == 0))
251                 return 0;
252
253         /* Use exactly nb_pkts descriptors */
254         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
255
256         /*
257          * At this point, we know there are enough descriptors in the
258          * ring to transmit all the packets.  This assumes that each
259          * mbuf contains a single segment, and that no new offloads
260          * are expected, which would require a new context descriptor.
261          */
262
263         /*
264          * See if we're going to wrap-around. If so, handle the top
265          * of the descriptor ring first, then do the bottom.  If not,
266          * the processing looks just like the "bottom" part anyway...
267          */
268         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
269                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
270                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
271
272                 /*
273                  * We know that the last descriptor in the ring will need to
274                  * have its RS bit set because tx_rs_thresh has to be
275                  * a divisor of the ring size
276                  */
277                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
278                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
279                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
280
281                 txq->tx_tail = 0;
282         }
283
284         /* Fill H/W descriptor ring with mbuf data */
285         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
286         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
287
288         /*
289          * Determine if RS bit should be set
290          * This is what we actually want:
291          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
292          * but instead of subtracting 1 and doing >=, we can just do
293          * greater than without subtracting.
294          */
295         if (txq->tx_tail > txq->tx_next_rs) {
296                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
297                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
298                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
299                                                 txq->tx_rs_thresh);
300                 if (txq->tx_next_rs >= txq->nb_tx_desc)
301                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302         }
303
304         /*
305          * Check for wrap-around. This would only happen if we used
306          * up to the last descriptor in the ring, no more, no less.
307          */
308         if (txq->tx_tail >= txq->nb_tx_desc)
309                 txq->tx_tail = 0;
310
311         /* update tail pointer */
312         rte_wmb();
313         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
314
315         return nb_pkts;
316 }
317
318 uint16_t
319 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
320                        uint16_t nb_pkts)
321 {
322         uint16_t nb_tx;
323
324         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
325         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
326                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
327
328         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
329         nb_tx = 0;
330         while (nb_pkts) {
331                 uint16_t ret, n;
332
333                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
334                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
335                 nb_tx = (uint16_t)(nb_tx + ret);
336                 nb_pkts = (uint16_t)(nb_pkts - ret);
337                 if (ret < n)
338                         break;
339         }
340
341         return nb_tx;
342 }
343
344 static uint16_t
345 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
346                     uint16_t nb_pkts)
347 {
348         uint16_t nb_tx = 0;
349         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
350
351         while (nb_pkts) {
352                 uint16_t ret, num;
353
354                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
355                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
356                                                  num);
357                 nb_tx += ret;
358                 nb_pkts -= ret;
359                 if (ret < num)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
370                 __rte_unused uint64_t *mdata)
371 {
372         uint32_t type_tucmd_mlhl;
373         uint32_t mss_l4len_idx = 0;
374         uint32_t ctx_idx;
375         uint32_t vlan_macip_lens;
376         union ixgbe_tx_offload tx_offload_mask;
377         uint32_t seqnum_seed = 0;
378
379         ctx_idx = txq->ctx_curr;
380         tx_offload_mask.data[0] = 0;
381         tx_offload_mask.data[1] = 0;
382         type_tucmd_mlhl = 0;
383
384         /* Specify which HW CTX to upload. */
385         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
386
387         if (ol_flags & PKT_TX_VLAN_PKT) {
388                 tx_offload_mask.vlan_tci |= ~0;
389         }
390
391         /* check if TCP segmentation required for this packet */
392         if (ol_flags & PKT_TX_TCP_SEG) {
393                 /* implies IP cksum in IPv4 */
394                 if (ol_flags & PKT_TX_IP_CKSUM)
395                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
396                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
397                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
398                 else
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
400                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
401                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
402
403                 tx_offload_mask.l2_len |= ~0;
404                 tx_offload_mask.l3_len |= ~0;
405                 tx_offload_mask.l4_len |= ~0;
406                 tx_offload_mask.tso_segsz |= ~0;
407                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
408                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
409         } else { /* no TSO, check if hardware checksum is needed */
410                 if (ol_flags & PKT_TX_IP_CKSUM) {
411                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
412                         tx_offload_mask.l2_len |= ~0;
413                         tx_offload_mask.l3_len |= ~0;
414                 }
415
416                 switch (ol_flags & PKT_TX_L4_MASK) {
417                 case PKT_TX_UDP_CKSUM:
418                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
419                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
421                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
422                         tx_offload_mask.l2_len |= ~0;
423                         tx_offload_mask.l3_len |= ~0;
424                         break;
425                 case PKT_TX_TCP_CKSUM:
426                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
429                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
430                         tx_offload_mask.l2_len |= ~0;
431                         tx_offload_mask.l3_len |= ~0;
432                         break;
433                 case PKT_TX_SCTP_CKSUM:
434                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
435                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
436                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
437                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
438                         tx_offload_mask.l2_len |= ~0;
439                         tx_offload_mask.l3_len |= ~0;
440                         break;
441                 default:
442                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
443                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
444                         break;
445                 }
446         }
447
448         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
449                 tx_offload_mask.outer_l2_len |= ~0;
450                 tx_offload_mask.outer_l3_len |= ~0;
451                 tx_offload_mask.l2_len |= ~0;
452                 seqnum_seed |= tx_offload.outer_l3_len
453                                << IXGBE_ADVTXD_OUTER_IPLEN;
454                 seqnum_seed |= tx_offload.l2_len
455                                << IXGBE_ADVTXD_TUNNEL_LEN;
456         }
457 #ifdef RTE_LIB_SECURITY
458         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
459                 union ixgbe_crypto_tx_desc_md *md =
460                                 (union ixgbe_crypto_tx_desc_md *)mdata;
461                 seqnum_seed |=
462                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
463                 type_tucmd_mlhl |= md->enc ?
464                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
465                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
466                 type_tucmd_mlhl |=
467                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
468                 tx_offload_mask.sa_idx |= ~0;
469                 tx_offload_mask.sec_pad_len |= ~0;
470         }
471 #endif
472
473         txq->ctx_cache[ctx_idx].flags = ol_flags;
474         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
475                 tx_offload_mask.data[0] & tx_offload.data[0];
476         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
477                 tx_offload_mask.data[1] & tx_offload.data[1];
478         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
479
480         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
481         vlan_macip_lens = tx_offload.l3_len;
482         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
483                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
484                                     IXGBE_ADVTXD_MACLEN_SHIFT);
485         else
486                 vlan_macip_lens |= (tx_offload.l2_len <<
487                                     IXGBE_ADVTXD_MACLEN_SHIFT);
488         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
489         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
490         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
491         ctx_txd->seqnum_seed     = seqnum_seed;
492 }
493
494 /*
495  * Check which hardware context can be used. Use the existing match
496  * or create a new context descriptor.
497  */
498 static inline uint32_t
499 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
500                    union ixgbe_tx_offload tx_offload)
501 {
502         /* If match with the current used context */
503         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
504                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
505                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
506                      & tx_offload.data[0])) &&
507                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
508                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
509                      & tx_offload.data[1]))))
510                 return txq->ctx_curr;
511
512         /* What if match with the next context  */
513         txq->ctx_curr ^= 1;
514         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
515                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
516                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
517                      & tx_offload.data[0])) &&
518                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
519                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
520                      & tx_offload.data[1]))))
521                 return txq->ctx_curr;
522
523         /* Mismatch, use the previous context */
524         return IXGBE_CTX_NUM;
525 }
526
527 static inline uint32_t
528 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
529 {
530         uint32_t tmp = 0;
531
532         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
534         if (ol_flags & PKT_TX_IP_CKSUM)
535                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
536         if (ol_flags & PKT_TX_TCP_SEG)
537                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
538         return tmp;
539 }
540
541 static inline uint32_t
542 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
543 {
544         uint32_t cmdtype = 0;
545
546         if (ol_flags & PKT_TX_VLAN_PKT)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
548         if (ol_flags & PKT_TX_TCP_SEG)
549                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
550         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
551                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
552         if (ol_flags & PKT_TX_MACSEC)
553                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
554         return cmdtype;
555 }
556
557 /* Default RS bit threshold values */
558 #ifndef DEFAULT_TX_RS_THRESH
559 #define DEFAULT_TX_RS_THRESH   32
560 #endif
561 #ifndef DEFAULT_TX_FREE_THRESH
562 #define DEFAULT_TX_FREE_THRESH 32
563 #endif
564
565 /* Reset transmit descriptors after they have been used */
566 static inline int
567 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
568 {
569         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
570         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
571         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
572         uint16_t nb_tx_desc = txq->nb_tx_desc;
573         uint16_t desc_to_clean_to;
574         uint16_t nb_tx_to_clean;
575         uint32_t status;
576
577         /* Determine the last descriptor needing to be cleaned */
578         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
579         if (desc_to_clean_to >= nb_tx_desc)
580                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
581
582         /* Check to make sure the last descriptor to clean is done */
583         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
584         status = txr[desc_to_clean_to].wb.status;
585         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
586                 PMD_TX_LOG(DEBUG,
587                            "TX descriptor %4u is not done"
588                            "(port=%d queue=%d)",
589                            desc_to_clean_to,
590                            txq->port_id, txq->queue_id);
591                 /* Failed to clean any descriptors, better luck next time */
592                 return -(1);
593         }
594
595         /* Figure out how many descriptors will be cleaned */
596         if (last_desc_cleaned > desc_to_clean_to)
597                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
598                                                         desc_to_clean_to);
599         else
600                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
601                                                 last_desc_cleaned);
602
603         PMD_TX_LOG(DEBUG,
604                    "Cleaning %4u TX descriptors: %4u to %4u "
605                    "(port=%d queue=%d)",
606                    nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
607                    txq->port_id, txq->queue_id);
608
609         /*
610          * The last descriptor to clean is done, so that means all the
611          * descriptors from the last descriptor that was cleaned
612          * up to the last descriptor with the RS bit set
613          * are done. Only reset the threshold descriptor.
614          */
615         txr[desc_to_clean_to].wb.status = 0;
616
617         /* Update the txq to reflect the last descriptor that was cleaned */
618         txq->last_desc_cleaned = desc_to_clean_to;
619         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
620
621         /* No Error */
622         return 0;
623 }
624
625 uint16_t
626 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
627                 uint16_t nb_pkts)
628 {
629         struct ixgbe_tx_queue *txq;
630         struct ixgbe_tx_entry *sw_ring;
631         struct ixgbe_tx_entry *txe, *txn;
632         volatile union ixgbe_adv_tx_desc *txr;
633         volatile union ixgbe_adv_tx_desc *txd, *txp;
634         struct rte_mbuf     *tx_pkt;
635         struct rte_mbuf     *m_seg;
636         uint64_t buf_dma_addr;
637         uint32_t olinfo_status;
638         uint32_t cmd_type_len;
639         uint32_t pkt_len;
640         uint16_t slen;
641         uint64_t ol_flags;
642         uint16_t tx_id;
643         uint16_t tx_last;
644         uint16_t nb_tx;
645         uint16_t nb_used;
646         uint64_t tx_ol_req;
647         uint32_t ctx = 0;
648         uint32_t new_ctx;
649         union ixgbe_tx_offload tx_offload;
650 #ifdef RTE_LIB_SECURITY
651         uint8_t use_ipsec;
652 #endif
653
654         tx_offload.data[0] = 0;
655         tx_offload.data[1] = 0;
656         txq = tx_queue;
657         sw_ring = txq->sw_ring;
658         txr     = txq->tx_ring;
659         tx_id   = txq->tx_tail;
660         txe = &sw_ring[tx_id];
661         txp = NULL;
662
663         /* Determine if the descriptor ring needs to be cleaned. */
664         if (txq->nb_tx_free < txq->tx_free_thresh)
665                 ixgbe_xmit_cleanup(txq);
666
667         rte_prefetch0(&txe->mbuf->pool);
668
669         /* TX loop */
670         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
671                 new_ctx = 0;
672                 tx_pkt = *tx_pkts++;
673                 pkt_len = tx_pkt->pkt_len;
674
675                 /*
676                  * Determine how many (if any) context descriptors
677                  * are needed for offload functionality.
678                  */
679                 ol_flags = tx_pkt->ol_flags;
680 #ifdef RTE_LIB_SECURITY
681                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
682 #endif
683
684                 /* If hardware offload required */
685                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
686                 if (tx_ol_req) {
687                         tx_offload.l2_len = tx_pkt->l2_len;
688                         tx_offload.l3_len = tx_pkt->l3_len;
689                         tx_offload.l4_len = tx_pkt->l4_len;
690                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
691                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
692                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
693                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
694 #ifdef RTE_LIB_SECURITY
695                         if (use_ipsec) {
696                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
697                                         (union ixgbe_crypto_tx_desc_md *)
698                                                 rte_security_dynfield(tx_pkt);
699                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
700                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
701                         }
702 #endif
703
704                         /* If new context need be built or reuse the exist ctx. */
705                         ctx = what_advctx_update(txq, tx_ol_req,
706                                 tx_offload);
707                         /* Only allocate context descriptor if required*/
708                         new_ctx = (ctx == IXGBE_CTX_NUM);
709                         ctx = txq->ctx_curr;
710                 }
711
712                 /*
713                  * Keep track of how many descriptors are used this loop
714                  * This will always be the number of segments + the number of
715                  * Context descriptors required to transmit the packet
716                  */
717                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
718
719                 if (txp != NULL &&
720                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
721                         /* set RS on the previous packet in the burst */
722                         txp->read.cmd_type_len |=
723                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
724
725                 /*
726                  * The number of descriptors that must be allocated for a
727                  * packet is the number of segments of that packet, plus 1
728                  * Context Descriptor for the hardware offload, if any.
729                  * Determine the last TX descriptor to allocate in the TX ring
730                  * for the packet, starting from the current position (tx_id)
731                  * in the ring.
732                  */
733                 tx_last = (uint16_t) (tx_id + nb_used - 1);
734
735                 /* Circular ring */
736                 if (tx_last >= txq->nb_tx_desc)
737                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
738
739                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
740                            " tx_first=%u tx_last=%u",
741                            (unsigned) txq->port_id,
742                            (unsigned) txq->queue_id,
743                            (unsigned) pkt_len,
744                            (unsigned) tx_id,
745                            (unsigned) tx_last);
746
747                 /*
748                  * Make sure there are enough TX descriptors available to
749                  * transmit the entire packet.
750                  * nb_used better be less than or equal to txq->tx_rs_thresh
751                  */
752                 if (nb_used > txq->nb_tx_free) {
753                         PMD_TX_LOG(DEBUG,
754                                    "Not enough free TX descriptors "
755                                    "nb_used=%4u nb_free=%4u "
756                                    "(port=%d queue=%d)",
757                                    nb_used, txq->nb_tx_free,
758                                    txq->port_id, txq->queue_id);
759
760                         if (ixgbe_xmit_cleanup(txq) != 0) {
761                                 /* Could not clean any descriptors */
762                                 if (nb_tx == 0)
763                                         return 0;
764                                 goto end_of_tx;
765                         }
766
767                         /* nb_used better be <= txq->tx_rs_thresh */
768                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
769                                 PMD_TX_LOG(DEBUG,
770                                            "The number of descriptors needed to "
771                                            "transmit the packet exceeds the "
772                                            "RS bit threshold. This will impact "
773                                            "performance."
774                                            "nb_used=%4u nb_free=%4u "
775                                            "tx_rs_thresh=%4u. "
776                                            "(port=%d queue=%d)",
777                                            nb_used, txq->nb_tx_free,
778                                            txq->tx_rs_thresh,
779                                            txq->port_id, txq->queue_id);
780                                 /*
781                                  * Loop here until there are enough TX
782                                  * descriptors or until the ring cannot be
783                                  * cleaned.
784                                  */
785                                 while (nb_used > txq->nb_tx_free) {
786                                         if (ixgbe_xmit_cleanup(txq) != 0) {
787                                                 /*
788                                                  * Could not clean any
789                                                  * descriptors
790                                                  */
791                                                 if (nb_tx == 0)
792                                                         return 0;
793                                                 goto end_of_tx;
794                                         }
795                                 }
796                         }
797                 }
798
799                 /*
800                  * By now there are enough free TX descriptors to transmit
801                  * the packet.
802                  */
803
804                 /*
805                  * Set common flags of all TX Data Descriptors.
806                  *
807                  * The following bits must be set in all Data Descriptors:
808                  *   - IXGBE_ADVTXD_DTYP_DATA
809                  *   - IXGBE_ADVTXD_DCMD_DEXT
810                  *
811                  * The following bits must be set in the first Data Descriptor
812                  * and are ignored in the other ones:
813                  *   - IXGBE_ADVTXD_DCMD_IFCS
814                  *   - IXGBE_ADVTXD_MAC_1588
815                  *   - IXGBE_ADVTXD_DCMD_VLE
816                  *
817                  * The following bits must only be set in the last Data
818                  * Descriptor:
819                  *   - IXGBE_TXD_CMD_EOP
820                  *
821                  * The following bits can be set in any Data Descriptor, but
822                  * are only set in the last Data Descriptor:
823                  *   - IXGBE_TXD_CMD_RS
824                  */
825                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
826                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
827
828 #ifdef RTE_LIBRTE_IEEE1588
829                 if (ol_flags & PKT_TX_IEEE1588_TMST)
830                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
831 #endif
832
833                 olinfo_status = 0;
834                 if (tx_ol_req) {
835
836                         if (ol_flags & PKT_TX_TCP_SEG) {
837                                 /* when TSO is on, paylen in descriptor is the
838                                  * not the packet len but the tcp payload len */
839                                 pkt_len -= (tx_offload.l2_len +
840                                         tx_offload.l3_len + tx_offload.l4_len);
841                         }
842
843                         /*
844                          * Setup the TX Advanced Context Descriptor if required
845                          */
846                         if (new_ctx) {
847                                 volatile struct ixgbe_adv_tx_context_desc *
848                                     ctx_txd;
849
850                                 ctx_txd = (volatile struct
851                                     ixgbe_adv_tx_context_desc *)
852                                     &txr[tx_id];
853
854                                 txn = &sw_ring[txe->next_id];
855                                 rte_prefetch0(&txn->mbuf->pool);
856
857                                 if (txe->mbuf != NULL) {
858                                         rte_pktmbuf_free_seg(txe->mbuf);
859                                         txe->mbuf = NULL;
860                                 }
861
862                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
863                                         tx_offload,
864                                         rte_security_dynfield(tx_pkt));
865
866                                 txe->last_id = tx_last;
867                                 tx_id = txe->next_id;
868                                 txe = txn;
869                         }
870
871                         /*
872                          * Setup the TX Advanced Data Descriptor,
873                          * This path will go through
874                          * whatever new/reuse the context descriptor
875                          */
876                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
877                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
878                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
879                 }
880
881                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
882 #ifdef RTE_LIB_SECURITY
883                 if (use_ipsec)
884                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
885 #endif
886
887                 m_seg = tx_pkt;
888                 do {
889                         txd = &txr[tx_id];
890                         txn = &sw_ring[txe->next_id];
891                         rte_prefetch0(&txn->mbuf->pool);
892
893                         if (txe->mbuf != NULL)
894                                 rte_pktmbuf_free_seg(txe->mbuf);
895                         txe->mbuf = m_seg;
896
897                         /*
898                          * Set up Transmit Data Descriptor.
899                          */
900                         slen = m_seg->data_len;
901                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
902                         txd->read.buffer_addr =
903                                 rte_cpu_to_le_64(buf_dma_addr);
904                         txd->read.cmd_type_len =
905                                 rte_cpu_to_le_32(cmd_type_len | slen);
906                         txd->read.olinfo_status =
907                                 rte_cpu_to_le_32(olinfo_status);
908                         txe->last_id = tx_last;
909                         tx_id = txe->next_id;
910                         txe = txn;
911                         m_seg = m_seg->next;
912                 } while (m_seg != NULL);
913
914                 /*
915                  * The last packet data descriptor needs End Of Packet (EOP)
916                  */
917                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
918                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
919                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
920
921                 /* Set RS bit only on threshold packets' last descriptor */
922                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
923                         PMD_TX_LOG(DEBUG,
924                                    "Setting RS bit on TXD id="
925                                    "%4u (port=%d queue=%d)",
926                                    tx_last, txq->port_id, txq->queue_id);
927
928                         cmd_type_len |= IXGBE_TXD_CMD_RS;
929
930                         /* Update txq RS bit counters */
931                         txq->nb_tx_used = 0;
932                         txp = NULL;
933                 } else
934                         txp = txd;
935
936                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
937         }
938
939 end_of_tx:
940         /* set RS on last packet in the burst */
941         if (txp != NULL)
942                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
943
944         rte_wmb();
945
946         /*
947          * Set the Transmit Descriptor Tail (TDT)
948          */
949         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
950                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
951                    (unsigned) tx_id, (unsigned) nb_tx);
952         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
953         txq->tx_tail = tx_id;
954
955         return nb_tx;
956 }
957
958 /*********************************************************************
959  *
960  *  TX prep functions
961  *
962  **********************************************************************/
963 uint16_t
964 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
965 {
966         int i, ret;
967         uint64_t ol_flags;
968         struct rte_mbuf *m;
969         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
970
971         for (i = 0; i < nb_pkts; i++) {
972                 m = tx_pkts[i];
973                 ol_flags = m->ol_flags;
974
975                 /**
976                  * Check if packet meets requirements for number of segments
977                  *
978                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
979                  *       non-TSO
980                  */
981
982                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
983                         rte_errno = EINVAL;
984                         return i;
985                 }
986
987                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
988                         rte_errno = ENOTSUP;
989                         return i;
990                 }
991
992                 /* check the size of packet */
993                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
994                         rte_errno = EINVAL;
995                         return i;
996                 }
997
998 #ifdef RTE_ETHDEV_DEBUG_TX
999                 ret = rte_validate_tx_offload(m);
1000                 if (ret != 0) {
1001                         rte_errno = -ret;
1002                         return i;
1003                 }
1004 #endif
1005                 ret = rte_net_intel_cksum_prepare(m);
1006                 if (ret != 0) {
1007                         rte_errno = -ret;
1008                         return i;
1009                 }
1010         }
1011
1012         return i;
1013 }
1014
1015 /*********************************************************************
1016  *
1017  *  RX functions
1018  *
1019  **********************************************************************/
1020
1021 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1022 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1023 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1024 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1025 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1030 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1031 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1032 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1033 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1036 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1037 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1041 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1045 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1049 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1054
1055 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1078
1079 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1102
1103 /**
1104  * Use 2 different table for normal packet and tunnel packet
1105  * to save the space.
1106  */
1107 const uint32_t
1108         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1109         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1110         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4,
1112         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1114         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1116         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1122         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1124         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1126         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6,
1128         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1130         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1132         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1138         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1140         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6,
1145         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1148         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1151         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6,
1157         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1160         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1163         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1175         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1185                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1187         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1188                 RTE_PTYPE_L2_ETHER |
1189                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1191 };
1192
1193 const uint32_t
1194         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1195         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1210         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1216         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1217                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1219                 RTE_PTYPE_INNER_L4_TCP,
1220         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1221                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1223                 RTE_PTYPE_INNER_L4_TCP,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1227         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1230                 RTE_PTYPE_INNER_L4_TCP,
1231         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1232                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1233                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1234                 RTE_PTYPE_INNER_L3_IPV4,
1235         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1236                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1238                 RTE_PTYPE_INNER_L4_UDP,
1239         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1242                 RTE_PTYPE_INNER_L4_UDP,
1243         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1244                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1246                 RTE_PTYPE_INNER_L4_SCTP,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1250         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1251                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1253                 RTE_PTYPE_INNER_L4_UDP,
1254         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1255                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1257                 RTE_PTYPE_INNER_L4_SCTP,
1258         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1259                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1260                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1261                 RTE_PTYPE_INNER_L3_IPV4,
1262         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1263                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1265                 RTE_PTYPE_INNER_L4_SCTP,
1266         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1267                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                 RTE_PTYPE_INNER_L4_SCTP,
1270         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1273                 RTE_PTYPE_INNER_L4_TCP,
1274         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1275                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1277                 RTE_PTYPE_INNER_L4_UDP,
1278
1279         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1282         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1283                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                 RTE_PTYPE_INNER_L3_IPV4,
1286         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1290         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                 RTE_PTYPE_INNER_L3_IPV6,
1294         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                 RTE_PTYPE_INNER_L3_IPV4,
1298         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1299                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1302         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1303                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                 RTE_PTYPE_INNER_L3_IPV4,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1307                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV4,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1319                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1323                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1324                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1325                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1335                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV4,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1343                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1347                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1351                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1352                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1353                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1354         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1355                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1358         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1359                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1362         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1363                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1366         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1367                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1370 };
1371
1372 static int
1373 ixgbe_monitor_callback(const uint64_t value,
1374                 const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
1375 {
1376         const uint64_t m = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1377         /*
1378          * we expect the DD bit to be set to 1 if this descriptor was already
1379          * written to.
1380          */
1381         return (value & m) == m ? -1 : 0;
1382 }
1383
1384 int
1385 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1386 {
1387         volatile union ixgbe_adv_rx_desc *rxdp;
1388         struct ixgbe_rx_queue *rxq = rx_queue;
1389         uint16_t desc;
1390
1391         desc = rxq->rx_tail;
1392         rxdp = &rxq->rx_ring[desc];
1393         /* watch for changes in status bit */
1394         pmc->addr = &rxdp->wb.upper.status_error;
1395
1396         /* comparison callback */
1397         pmc->fn = ixgbe_monitor_callback;
1398
1399         /* the registers are 32-bit */
1400         pmc->size = sizeof(uint32_t);
1401
1402         return 0;
1403 }
1404
1405 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1406 static inline uint32_t
1407 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1408 {
1409
1410         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1411                 return RTE_PTYPE_UNKNOWN;
1412
1413         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1414
1415         /* For tunnel packet */
1416         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1417                 /* Remove the tunnel bit to save the space. */
1418                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1419                 return ptype_table_tn[pkt_info];
1420         }
1421
1422         /**
1423          * For x550, if it's not tunnel,
1424          * tunnel type bit should be set to 0.
1425          * Reuse 82599's mask.
1426          */
1427         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1428
1429         return ptype_table[pkt_info];
1430 }
1431
1432 static inline uint64_t
1433 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1434 {
1435         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1436                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1437                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1438                 PKT_RX_RSS_HASH, 0, 0, 0,
1439                 0, 0, 0,  PKT_RX_FDIR,
1440         };
1441 #ifdef RTE_LIBRTE_IEEE1588
1442         static uint64_t ip_pkt_etqf_map[8] = {
1443                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1444                 0, 0, 0, 0,
1445         };
1446
1447         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1448                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1449                                 ip_rss_types_map[pkt_info & 0XF];
1450         else
1451                 return ip_rss_types_map[pkt_info & 0XF];
1452 #else
1453         return ip_rss_types_map[pkt_info & 0XF];
1454 #endif
1455 }
1456
1457 static inline uint64_t
1458 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1459 {
1460         uint64_t pkt_flags;
1461
1462         /*
1463          * Check if VLAN present only.
1464          * Do not check whether L3/L4 rx checksum done by NIC or not,
1465          * That can be found from rte_eth_rxmode.offloads flag
1466          */
1467         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1468
1469 #ifdef RTE_LIBRTE_IEEE1588
1470         if (rx_status & IXGBE_RXD_STAT_TMST)
1471                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1472 #endif
1473         return pkt_flags;
1474 }
1475
1476 static inline uint64_t
1477 rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
1478                            uint8_t rx_udp_csum_zero_err)
1479 {
1480         uint64_t pkt_flags;
1481
1482         /*
1483          * Bit 31: IPE, IPv4 checksum error
1484          * Bit 30: L4I, L4I integrity error
1485          */
1486         static uint64_t error_to_pkt_flags_map[4] = {
1487                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1488                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1489                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1490                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1491         };
1492         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1493                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1494
1495         /* Mask out the bad UDP checksum error if the hardware has UDP zero
1496          * checksum error issue, so that the software application will then
1497          * have to recompute the checksum itself if needed.
1498          */
1499         if ((rx_status & IXGBE_RXDADV_ERR_TCPE) &&
1500             (pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) &&
1501             rx_udp_csum_zero_err)
1502                 pkt_flags &= ~PKT_RX_L4_CKSUM_BAD;
1503
1504         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1505             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1506                 pkt_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
1507         }
1508
1509 #ifdef RTE_LIB_SECURITY
1510         if (rx_status & IXGBE_RXD_STAT_SECP) {
1511                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1512                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1513                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1514         }
1515 #endif
1516
1517         return pkt_flags;
1518 }
1519
1520 /*
1521  * LOOK_AHEAD defines how many desc statuses to check beyond the
1522  * current descriptor.
1523  * It must be a pound define for optimal performance.
1524  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1525  * function only works with LOOK_AHEAD=8.
1526  */
1527 #define LOOK_AHEAD 8
1528 #if (LOOK_AHEAD != 8)
1529 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1530 #endif
1531 static inline int
1532 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1533 {
1534         volatile union ixgbe_adv_rx_desc *rxdp;
1535         struct ixgbe_rx_entry *rxep;
1536         struct rte_mbuf *mb;
1537         uint16_t pkt_len;
1538         uint64_t pkt_flags;
1539         int nb_dd;
1540         uint32_t s[LOOK_AHEAD];
1541         uint32_t pkt_info[LOOK_AHEAD];
1542         int i, j, nb_rx = 0;
1543         uint32_t status;
1544         uint64_t vlan_flags = rxq->vlan_flags;
1545
1546         /* get references to current descriptor and S/W ring entry */
1547         rxdp = &rxq->rx_ring[rxq->rx_tail];
1548         rxep = &rxq->sw_ring[rxq->rx_tail];
1549
1550         status = rxdp->wb.upper.status_error;
1551         /* check to make sure there is at least 1 packet to receive */
1552         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1553                 return 0;
1554
1555         /*
1556          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1557          * reference packets that are ready to be received.
1558          */
1559         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1560              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1561                 /* Read desc statuses backwards to avoid race condition */
1562                 for (j = 0; j < LOOK_AHEAD; j++)
1563                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1564
1565                 rte_smp_rmb();
1566
1567                 /* Compute how many status bits were set */
1568                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1569                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1570                         ;
1571
1572                 for (j = 0; j < nb_dd; j++)
1573                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1574                                                        lo_dword.data);
1575
1576                 nb_rx += nb_dd;
1577
1578                 /* Translate descriptor info to mbuf format */
1579                 for (j = 0; j < nb_dd; ++j) {
1580                         mb = rxep[j].mbuf;
1581                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1582                                   rxq->crc_len;
1583                         mb->data_len = pkt_len;
1584                         mb->pkt_len = pkt_len;
1585                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1586
1587                         /* convert descriptor fields to rte mbuf flags */
1588                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1589                                 vlan_flags);
1590                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j],
1591                                         (uint16_t)pkt_info[j],
1592                                         rxq->rx_udp_csum_zero_err);
1593                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1594                                         ((uint16_t)pkt_info[j]);
1595                         mb->ol_flags = pkt_flags;
1596                         mb->packet_type =
1597                                 ixgbe_rxd_pkt_info_to_pkt_type
1598                                         (pkt_info[j], rxq->pkt_type_mask);
1599
1600                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1601                                 mb->hash.rss = rte_le_to_cpu_32(
1602                                     rxdp[j].wb.lower.hi_dword.rss);
1603                         else if (pkt_flags & PKT_RX_FDIR) {
1604                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1605                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1606                                     IXGBE_ATR_HASH_MASK;
1607                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1608                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1609                         }
1610                 }
1611
1612                 /* Move mbuf pointers from the S/W ring to the stage */
1613                 for (j = 0; j < LOOK_AHEAD; ++j) {
1614                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1615                 }
1616
1617                 /* stop if all requested packets could not be received */
1618                 if (nb_dd != LOOK_AHEAD)
1619                         break;
1620         }
1621
1622         /* clear software ring entries so we can cleanup correctly */
1623         for (i = 0; i < nb_rx; ++i) {
1624                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1625         }
1626
1627
1628         return nb_rx;
1629 }
1630
1631 static inline int
1632 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1633 {
1634         volatile union ixgbe_adv_rx_desc *rxdp;
1635         struct ixgbe_rx_entry *rxep;
1636         struct rte_mbuf *mb;
1637         uint16_t alloc_idx;
1638         __le64 dma_addr;
1639         int diag, i;
1640
1641         /* allocate buffers in bulk directly into the S/W ring */
1642         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1643         rxep = &rxq->sw_ring[alloc_idx];
1644         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1645                                     rxq->rx_free_thresh);
1646         if (unlikely(diag != 0))
1647                 return -ENOMEM;
1648
1649         rxdp = &rxq->rx_ring[alloc_idx];
1650         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1651                 /* populate the static rte mbuf fields */
1652                 mb = rxep[i].mbuf;
1653                 if (reset_mbuf) {
1654                         mb->port = rxq->port_id;
1655                 }
1656
1657                 rte_mbuf_refcnt_set(mb, 1);
1658                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1659
1660                 /* populate the descriptors */
1661                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1662                 rxdp[i].read.hdr_addr = 0;
1663                 rxdp[i].read.pkt_addr = dma_addr;
1664         }
1665
1666         /* update state of internal queue structure */
1667         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1668         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1669                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1670
1671         /* no errors */
1672         return 0;
1673 }
1674
1675 static inline uint16_t
1676 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1677                          uint16_t nb_pkts)
1678 {
1679         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1680         int i;
1681
1682         /* how many packets are ready to return? */
1683         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1684
1685         /* copy mbuf pointers to the application's packet list */
1686         for (i = 0; i < nb_pkts; ++i)
1687                 rx_pkts[i] = stage[i];
1688
1689         /* update internal queue state */
1690         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1691         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1692
1693         return nb_pkts;
1694 }
1695
1696 static inline uint16_t
1697 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1698              uint16_t nb_pkts)
1699 {
1700         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1701         uint16_t nb_rx = 0;
1702
1703         /* Any previously recv'd pkts will be returned from the Rx stage */
1704         if (rxq->rx_nb_avail)
1705                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1706
1707         /* Scan the H/W ring for packets to receive */
1708         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1709
1710         /* update internal queue state */
1711         rxq->rx_next_avail = 0;
1712         rxq->rx_nb_avail = nb_rx;
1713         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1714
1715         /* if required, allocate new buffers to replenish descriptors */
1716         if (rxq->rx_tail > rxq->rx_free_trigger) {
1717                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1718
1719                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1720                         int i, j;
1721
1722                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1723                                    "queue_id=%u", (unsigned) rxq->port_id,
1724                                    (unsigned) rxq->queue_id);
1725
1726                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1727                                 rxq->rx_free_thresh;
1728
1729                         /*
1730                          * Need to rewind any previous receives if we cannot
1731                          * allocate new buffers to replenish the old ones.
1732                          */
1733                         rxq->rx_nb_avail = 0;
1734                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1735                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1736                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1737
1738                         return 0;
1739                 }
1740
1741                 /* update tail pointer */
1742                 rte_wmb();
1743                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1744                                             cur_free_trigger);
1745         }
1746
1747         if (rxq->rx_tail >= rxq->nb_rx_desc)
1748                 rxq->rx_tail = 0;
1749
1750         /* received any packets this loop? */
1751         if (rxq->rx_nb_avail)
1752                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1753
1754         return 0;
1755 }
1756
1757 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1758 uint16_t
1759 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1760                            uint16_t nb_pkts)
1761 {
1762         uint16_t nb_rx;
1763
1764         if (unlikely(nb_pkts == 0))
1765                 return 0;
1766
1767         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1768                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1769
1770         /* request is relatively large, chunk it up */
1771         nb_rx = 0;
1772         while (nb_pkts) {
1773                 uint16_t ret, n;
1774
1775                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1776                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1777                 nb_rx = (uint16_t)(nb_rx + ret);
1778                 nb_pkts = (uint16_t)(nb_pkts - ret);
1779                 if (ret < n)
1780                         break;
1781         }
1782
1783         return nb_rx;
1784 }
1785
1786 uint16_t
1787 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1788                 uint16_t nb_pkts)
1789 {
1790         struct ixgbe_rx_queue *rxq;
1791         volatile union ixgbe_adv_rx_desc *rx_ring;
1792         volatile union ixgbe_adv_rx_desc *rxdp;
1793         struct ixgbe_rx_entry *sw_ring;
1794         struct ixgbe_rx_entry *rxe;
1795         struct rte_mbuf *rxm;
1796         struct rte_mbuf *nmb;
1797         union ixgbe_adv_rx_desc rxd;
1798         uint64_t dma_addr;
1799         uint32_t staterr;
1800         uint32_t pkt_info;
1801         uint16_t pkt_len;
1802         uint16_t rx_id;
1803         uint16_t nb_rx;
1804         uint16_t nb_hold;
1805         uint64_t pkt_flags;
1806         uint64_t vlan_flags;
1807
1808         nb_rx = 0;
1809         nb_hold = 0;
1810         rxq = rx_queue;
1811         rx_id = rxq->rx_tail;
1812         rx_ring = rxq->rx_ring;
1813         sw_ring = rxq->sw_ring;
1814         vlan_flags = rxq->vlan_flags;
1815         while (nb_rx < nb_pkts) {
1816                 /*
1817                  * The order of operations here is important as the DD status
1818                  * bit must not be read after any other descriptor fields.
1819                  * rx_ring and rxdp are pointing to volatile data so the order
1820                  * of accesses cannot be reordered by the compiler. If they were
1821                  * not volatile, they could be reordered which could lead to
1822                  * using invalid descriptor fields when read from rxd.
1823                  */
1824                 rxdp = &rx_ring[rx_id];
1825                 staterr = rxdp->wb.upper.status_error;
1826                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1827                         break;
1828                 rxd = *rxdp;
1829
1830                 /*
1831                  * End of packet.
1832                  *
1833                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1834                  * is likely to be invalid and to be dropped by the various
1835                  * validation checks performed by the network stack.
1836                  *
1837                  * Allocate a new mbuf to replenish the RX ring descriptor.
1838                  * If the allocation fails:
1839                  *    - arrange for that RX descriptor to be the first one
1840                  *      being parsed the next time the receive function is
1841                  *      invoked [on the same queue].
1842                  *
1843                  *    - Stop parsing the RX ring and return immediately.
1844                  *
1845                  * This policy do not drop the packet received in the RX
1846                  * descriptor for which the allocation of a new mbuf failed.
1847                  * Thus, it allows that packet to be later retrieved if
1848                  * mbuf have been freed in the mean time.
1849                  * As a side effect, holding RX descriptors instead of
1850                  * systematically giving them back to the NIC may lead to
1851                  * RX ring exhaustion situations.
1852                  * However, the NIC can gracefully prevent such situations
1853                  * to happen by sending specific "back-pressure" flow control
1854                  * frames to its peer(s).
1855                  */
1856                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1857                            "ext_err_stat=0x%08x pkt_len=%u",
1858                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1859                            (unsigned) rx_id, (unsigned) staterr,
1860                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1861
1862                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1863                 if (nmb == NULL) {
1864                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1865                                    "queue_id=%u", (unsigned) rxq->port_id,
1866                                    (unsigned) rxq->queue_id);
1867                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1868                         break;
1869                 }
1870
1871                 nb_hold++;
1872                 rxe = &sw_ring[rx_id];
1873                 rx_id++;
1874                 if (rx_id == rxq->nb_rx_desc)
1875                         rx_id = 0;
1876
1877                 /* Prefetch next mbuf while processing current one. */
1878                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1879
1880                 /*
1881                  * When next RX descriptor is on a cache-line boundary,
1882                  * prefetch the next 4 RX descriptors and the next 8 pointers
1883                  * to mbufs.
1884                  */
1885                 if ((rx_id & 0x3) == 0) {
1886                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1887                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1888                 }
1889
1890                 rxm = rxe->mbuf;
1891                 rxe->mbuf = nmb;
1892                 dma_addr =
1893                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1894                 rxdp->read.hdr_addr = 0;
1895                 rxdp->read.pkt_addr = dma_addr;
1896
1897                 /*
1898                  * Initialize the returned mbuf.
1899                  * 1) setup generic mbuf fields:
1900                  *    - number of segments,
1901                  *    - next segment,
1902                  *    - packet length,
1903                  *    - RX port identifier.
1904                  * 2) integrate hardware offload data, if any:
1905                  *    - RSS flag & hash,
1906                  *    - IP checksum flag,
1907                  *    - VLAN TCI, if any,
1908                  *    - error flags.
1909                  */
1910                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1911                                       rxq->crc_len);
1912                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1913                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1914                 rxm->nb_segs = 1;
1915                 rxm->next = NULL;
1916                 rxm->pkt_len = pkt_len;
1917                 rxm->data_len = pkt_len;
1918                 rxm->port = rxq->port_id;
1919
1920                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1921                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1922                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1923
1924                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1925                 pkt_flags = pkt_flags |
1926                         rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
1927                                                    rxq->rx_udp_csum_zero_err);
1928                 pkt_flags = pkt_flags |
1929                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1930                 rxm->ol_flags = pkt_flags;
1931                 rxm->packet_type =
1932                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1933                                                        rxq->pkt_type_mask);
1934
1935                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1936                         rxm->hash.rss = rte_le_to_cpu_32(
1937                                                 rxd.wb.lower.hi_dword.rss);
1938                 else if (pkt_flags & PKT_RX_FDIR) {
1939                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1940                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1941                                         IXGBE_ATR_HASH_MASK;
1942                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1943                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1944                 }
1945                 /*
1946                  * Store the mbuf address into the next entry of the array
1947                  * of returned packets.
1948                  */
1949                 rx_pkts[nb_rx++] = rxm;
1950         }
1951         rxq->rx_tail = rx_id;
1952
1953         /*
1954          * If the number of free RX descriptors is greater than the RX free
1955          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1956          * register.
1957          * Update the RDT with the value of the last processed RX descriptor
1958          * minus 1, to guarantee that the RDT register is never equal to the
1959          * RDH register, which creates a "full" ring situtation from the
1960          * hardware point of view...
1961          */
1962         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1963         if (nb_hold > rxq->rx_free_thresh) {
1964                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1965                            "nb_hold=%u nb_rx=%u",
1966                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1967                            (unsigned) rx_id, (unsigned) nb_hold,
1968                            (unsigned) nb_rx);
1969                 rx_id = (uint16_t) ((rx_id == 0) ?
1970                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1971                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1972                 nb_hold = 0;
1973         }
1974         rxq->nb_rx_hold = nb_hold;
1975         return nb_rx;
1976 }
1977
1978 /**
1979  * Detect an RSC descriptor.
1980  */
1981 static inline uint32_t
1982 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1983 {
1984         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1985                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1986 }
1987
1988 /**
1989  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1990  *
1991  * Fill the following info in the HEAD buffer of the Rx cluster:
1992  *    - RX port identifier
1993  *    - hardware offload data, if any:
1994  *      - RSS flag & hash
1995  *      - IP checksum flag
1996  *      - VLAN TCI, if any
1997  *      - error flags
1998  * @head HEAD of the packet cluster
1999  * @desc HW descriptor to get data from
2000  * @rxq Pointer to the Rx queue
2001  */
2002 static inline void
2003 ixgbe_fill_cluster_head_buf(
2004         struct rte_mbuf *head,
2005         union ixgbe_adv_rx_desc *desc,
2006         struct ixgbe_rx_queue *rxq,
2007         uint32_t staterr)
2008 {
2009         uint32_t pkt_info;
2010         uint64_t pkt_flags;
2011
2012         head->port = rxq->port_id;
2013
2014         /* The vlan_tci field is only valid when PKT_RX_VLAN is
2015          * set in the pkt_flags field.
2016          */
2017         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
2018         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
2019         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
2020         pkt_flags |= rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
2021                                                 rxq->rx_udp_csum_zero_err);
2022         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2023         head->ol_flags = pkt_flags;
2024         head->packet_type =
2025                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2026
2027         if (likely(pkt_flags & PKT_RX_RSS_HASH))
2028                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2029         else if (pkt_flags & PKT_RX_FDIR) {
2030                 head->hash.fdir.hash =
2031                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2032                                                           & IXGBE_ATR_HASH_MASK;
2033                 head->hash.fdir.id =
2034                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2035         }
2036 }
2037
2038 /**
2039  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2040  *
2041  * @rx_queue Rx queue handle
2042  * @rx_pkts table of received packets
2043  * @nb_pkts size of rx_pkts table
2044  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2045  *
2046  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2047  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2048  *
2049  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2050  * 1) When non-EOP RSC completion arrives:
2051  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2052  *       segment's data length.
2053  *    b) Set the "next" pointer of the current segment to point to the segment
2054  *       at the NEXTP index.
2055  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2056  *       in the sw_rsc_ring.
2057  * 2) When EOP arrives we just update the cluster's total length and offload
2058  *    flags and deliver the cluster up to the upper layers. In our case - put it
2059  *    in the rx_pkts table.
2060  *
2061  * Returns the number of received packets/clusters (according to the "bulk
2062  * receive" interface).
2063  */
2064 static inline uint16_t
2065 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2066                     bool bulk_alloc)
2067 {
2068         struct ixgbe_rx_queue *rxq = rx_queue;
2069         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2070         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2071         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2072         uint16_t rx_id = rxq->rx_tail;
2073         uint16_t nb_rx = 0;
2074         uint16_t nb_hold = rxq->nb_rx_hold;
2075         uint16_t prev_id = rxq->rx_tail;
2076
2077         while (nb_rx < nb_pkts) {
2078                 bool eop;
2079                 struct ixgbe_rx_entry *rxe;
2080                 struct ixgbe_scattered_rx_entry *sc_entry;
2081                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2082                 struct ixgbe_rx_entry *next_rxe = NULL;
2083                 struct rte_mbuf *first_seg;
2084                 struct rte_mbuf *rxm;
2085                 struct rte_mbuf *nmb = NULL;
2086                 union ixgbe_adv_rx_desc rxd;
2087                 uint16_t data_len;
2088                 uint16_t next_id;
2089                 volatile union ixgbe_adv_rx_desc *rxdp;
2090                 uint32_t staterr;
2091
2092 next_desc:
2093                 /*
2094                  * The code in this whole file uses the volatile pointer to
2095                  * ensure the read ordering of the status and the rest of the
2096                  * descriptor fields (on the compiler level only!!!). This is so
2097                  * UGLY - why not to just use the compiler barrier instead? DPDK
2098                  * even has the rte_compiler_barrier() for that.
2099                  *
2100                  * But most importantly this is just wrong because this doesn't
2101                  * ensure memory ordering in a general case at all. For
2102                  * instance, DPDK is supposed to work on Power CPUs where
2103                  * compiler barrier may just not be enough!
2104                  *
2105                  * I tried to write only this function properly to have a
2106                  * starting point (as a part of an LRO/RSC series) but the
2107                  * compiler cursed at me when I tried to cast away the
2108                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2109                  * keeping it the way it is for now.
2110                  *
2111                  * The code in this file is broken in so many other places and
2112                  * will just not work on a big endian CPU anyway therefore the
2113                  * lines below will have to be revisited together with the rest
2114                  * of the ixgbe PMD.
2115                  *
2116                  * TODO:
2117                  *    - Get rid of "volatile" and let the compiler do its job.
2118                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2119                  *      memory ordering below.
2120                  */
2121                 rxdp = &rx_ring[rx_id];
2122                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2123
2124                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2125                         break;
2126
2127                 rxd = *rxdp;
2128
2129                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2130                                   "staterr=0x%x data_len=%u",
2131                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2132                            rte_le_to_cpu_16(rxd.wb.upper.length));
2133
2134                 if (!bulk_alloc) {
2135                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2136                         if (nmb == NULL) {
2137                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2138                                                   "port_id=%u queue_id=%u",
2139                                            rxq->port_id, rxq->queue_id);
2140
2141                                 rte_eth_devices[rxq->port_id].data->
2142                                                         rx_mbuf_alloc_failed++;
2143                                 break;
2144                         }
2145                 } else if (nb_hold > rxq->rx_free_thresh) {
2146                         uint16_t next_rdt = rxq->rx_free_trigger;
2147
2148                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2149                                 rte_wmb();
2150                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2151                                                         rxq->rdt_reg_addr,
2152                                                         next_rdt);
2153                                 nb_hold -= rxq->rx_free_thresh;
2154                         } else {
2155                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2156                                                   "port_id=%u queue_id=%u",
2157                                            rxq->port_id, rxq->queue_id);
2158
2159                                 rte_eth_devices[rxq->port_id].data->
2160                                                         rx_mbuf_alloc_failed++;
2161                                 break;
2162                         }
2163                 }
2164
2165                 nb_hold++;
2166                 rxe = &sw_ring[rx_id];
2167                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2168
2169                 next_id = rx_id + 1;
2170                 if (next_id == rxq->nb_rx_desc)
2171                         next_id = 0;
2172
2173                 /* Prefetch next mbuf while processing current one. */
2174                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2175
2176                 /*
2177                  * When next RX descriptor is on a cache-line boundary,
2178                  * prefetch the next 4 RX descriptors and the next 4 pointers
2179                  * to mbufs.
2180                  */
2181                 if ((next_id & 0x3) == 0) {
2182                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2183                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2184                 }
2185
2186                 rxm = rxe->mbuf;
2187
2188                 if (!bulk_alloc) {
2189                         __le64 dma =
2190                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2191                         /*
2192                          * Update RX descriptor with the physical address of the
2193                          * new data buffer of the new allocated mbuf.
2194                          */
2195                         rxe->mbuf = nmb;
2196
2197                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2198                         rxdp->read.hdr_addr = 0;
2199                         rxdp->read.pkt_addr = dma;
2200                 } else
2201                         rxe->mbuf = NULL;
2202
2203                 /*
2204                  * Set data length & data buffer address of mbuf.
2205                  */
2206                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2207                 rxm->data_len = data_len;
2208
2209                 if (!eop) {
2210                         uint16_t nextp_id;
2211                         /*
2212                          * Get next descriptor index:
2213                          *  - For RSC it's in the NEXTP field.
2214                          *  - For a scattered packet - it's just a following
2215                          *    descriptor.
2216                          */
2217                         if (ixgbe_rsc_count(&rxd))
2218                                 nextp_id =
2219                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2220                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2221                         else
2222                                 nextp_id = next_id;
2223
2224                         next_sc_entry = &sw_sc_ring[nextp_id];
2225                         next_rxe = &sw_ring[nextp_id];
2226                         rte_ixgbe_prefetch(next_rxe);
2227                 }
2228
2229                 sc_entry = &sw_sc_ring[rx_id];
2230                 first_seg = sc_entry->fbuf;
2231                 sc_entry->fbuf = NULL;
2232
2233                 /*
2234                  * If this is the first buffer of the received packet,
2235                  * set the pointer to the first mbuf of the packet and
2236                  * initialize its context.
2237                  * Otherwise, update the total length and the number of segments
2238                  * of the current scattered packet, and update the pointer to
2239                  * the last mbuf of the current packet.
2240                  */
2241                 if (first_seg == NULL) {
2242                         first_seg = rxm;
2243                         first_seg->pkt_len = data_len;
2244                         first_seg->nb_segs = 1;
2245                 } else {
2246                         first_seg->pkt_len += data_len;
2247                         first_seg->nb_segs++;
2248                 }
2249
2250                 prev_id = rx_id;
2251                 rx_id = next_id;
2252
2253                 /*
2254                  * If this is not the last buffer of the received packet, update
2255                  * the pointer to the first mbuf at the NEXTP entry in the
2256                  * sw_sc_ring and continue to parse the RX ring.
2257                  */
2258                 if (!eop && next_rxe) {
2259                         rxm->next = next_rxe->mbuf;
2260                         next_sc_entry->fbuf = first_seg;
2261                         goto next_desc;
2262                 }
2263
2264                 /* Initialize the first mbuf of the returned packet */
2265                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2266
2267                 /*
2268                  * Deal with the case, when HW CRC srip is disabled.
2269                  * That can't happen when LRO is enabled, but still could
2270                  * happen for scattered RX mode.
2271                  */
2272                 first_seg->pkt_len -= rxq->crc_len;
2273                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2274                         struct rte_mbuf *lp;
2275
2276                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2277                                 ;
2278
2279                         first_seg->nb_segs--;
2280                         lp->data_len -= rxq->crc_len - rxm->data_len;
2281                         lp->next = NULL;
2282                         rte_pktmbuf_free_seg(rxm);
2283                 } else
2284                         rxm->data_len -= rxq->crc_len;
2285
2286                 /* Prefetch data of first segment, if configured to do so. */
2287                 rte_packet_prefetch((char *)first_seg->buf_addr +
2288                         first_seg->data_off);
2289
2290                 /*
2291                  * Store the mbuf address into the next entry of the array
2292                  * of returned packets.
2293                  */
2294                 rx_pkts[nb_rx++] = first_seg;
2295         }
2296
2297         /*
2298          * Record index of the next RX descriptor to probe.
2299          */
2300         rxq->rx_tail = rx_id;
2301
2302         /*
2303          * If the number of free RX descriptors is greater than the RX free
2304          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2305          * register.
2306          * Update the RDT with the value of the last processed RX descriptor
2307          * minus 1, to guarantee that the RDT register is never equal to the
2308          * RDH register, which creates a "full" ring situtation from the
2309          * hardware point of view...
2310          */
2311         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2312                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2313                            "nb_hold=%u nb_rx=%u",
2314                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2315
2316                 rte_wmb();
2317                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2318                 nb_hold = 0;
2319         }
2320
2321         rxq->nb_rx_hold = nb_hold;
2322         return nb_rx;
2323 }
2324
2325 uint16_t
2326 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2327                                  uint16_t nb_pkts)
2328 {
2329         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2330 }
2331
2332 uint16_t
2333 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2334                                uint16_t nb_pkts)
2335 {
2336         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2337 }
2338
2339 /*********************************************************************
2340  *
2341  *  Queue management functions
2342  *
2343  **********************************************************************/
2344
2345 static void __rte_cold
2346 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2347 {
2348         unsigned i;
2349
2350         if (txq->sw_ring != NULL) {
2351                 for (i = 0; i < txq->nb_tx_desc; i++) {
2352                         if (txq->sw_ring[i].mbuf != NULL) {
2353                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2354                                 txq->sw_ring[i].mbuf = NULL;
2355                         }
2356                 }
2357         }
2358 }
2359
2360 static int
2361 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2362 {
2363         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2364         uint16_t i, tx_last, tx_id;
2365         uint16_t nb_tx_free_last;
2366         uint16_t nb_tx_to_clean;
2367         uint32_t pkt_cnt;
2368
2369         /* Start free mbuf from the next of tx_tail */
2370         tx_last = txq->tx_tail;
2371         tx_id  = swr_ring[tx_last].next_id;
2372
2373         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2374                 return 0;
2375
2376         nb_tx_to_clean = txq->nb_tx_free;
2377         nb_tx_free_last = txq->nb_tx_free;
2378         if (!free_cnt)
2379                 free_cnt = txq->nb_tx_desc;
2380
2381         /* Loop through swr_ring to count the amount of
2382          * freeable mubfs and packets.
2383          */
2384         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2385                 for (i = 0; i < nb_tx_to_clean &&
2386                         pkt_cnt < free_cnt &&
2387                         tx_id != tx_last; i++) {
2388                         if (swr_ring[tx_id].mbuf != NULL) {
2389                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2390                                 swr_ring[tx_id].mbuf = NULL;
2391
2392                                 /*
2393                                  * last segment in the packet,
2394                                  * increment packet count
2395                                  */
2396                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2397                         }
2398
2399                         tx_id = swr_ring[tx_id].next_id;
2400                 }
2401
2402                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2403                         txq->nb_tx_free || tx_id == tx_last)
2404                         break;
2405
2406                 if (pkt_cnt < free_cnt) {
2407                         if (ixgbe_xmit_cleanup(txq))
2408                                 break;
2409
2410                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2411                         nb_tx_free_last = txq->nb_tx_free;
2412                 }
2413         }
2414
2415         return (int)pkt_cnt;
2416 }
2417
2418 static int
2419 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2420                         uint32_t free_cnt)
2421 {
2422         int i, n, cnt;
2423
2424         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2425                 free_cnt = txq->nb_tx_desc;
2426
2427         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2428
2429         for (i = 0; i < cnt; i += n) {
2430                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2431                         break;
2432
2433                 n = ixgbe_tx_free_bufs(txq);
2434
2435                 if (n == 0)
2436                         break;
2437         }
2438
2439         return i;
2440 }
2441
2442 static int
2443 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2444                         uint32_t free_cnt __rte_unused)
2445 {
2446         return -ENOTSUP;
2447 }
2448
2449 int
2450 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2451 {
2452         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2453         if (txq->offloads == 0 &&
2454 #ifdef RTE_LIB_SECURITY
2455                         !(txq->using_ipsec) &&
2456 #endif
2457                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2458                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2459                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2460                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2461                                         txq->sw_ring_v != NULL)) {
2462                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2463                 } else {
2464                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2465                 }
2466         }
2467
2468         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2469 }
2470
2471 static void __rte_cold
2472 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2473 {
2474         if (txq != NULL &&
2475             txq->sw_ring != NULL)
2476                 rte_free(txq->sw_ring);
2477 }
2478
2479 static void __rte_cold
2480 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2481 {
2482         if (txq != NULL && txq->ops != NULL) {
2483                 txq->ops->release_mbufs(txq);
2484                 txq->ops->free_swring(txq);
2485                 rte_memzone_free(txq->mz);
2486                 rte_free(txq);
2487         }
2488 }
2489
2490 void __rte_cold
2491 ixgbe_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
2492 {
2493         ixgbe_tx_queue_release(dev->data->tx_queues[qid]);
2494 }
2495
2496 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2497 static void __rte_cold
2498 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2499 {
2500         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2501         struct ixgbe_tx_entry *txe = txq->sw_ring;
2502         uint16_t prev, i;
2503
2504         /* Zero out HW ring memory */
2505         for (i = 0; i < txq->nb_tx_desc; i++) {
2506                 txq->tx_ring[i] = zeroed_desc;
2507         }
2508
2509         /* Initialize SW ring entries */
2510         prev = (uint16_t) (txq->nb_tx_desc - 1);
2511         for (i = 0; i < txq->nb_tx_desc; i++) {
2512                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2513
2514                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2515                 txe[i].mbuf = NULL;
2516                 txe[i].last_id = i;
2517                 txe[prev].next_id = i;
2518                 prev = i;
2519         }
2520
2521         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2522         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2523
2524         txq->tx_tail = 0;
2525         txq->nb_tx_used = 0;
2526         /*
2527          * Always allow 1 descriptor to be un-allocated to avoid
2528          * a H/W race condition
2529          */
2530         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2531         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2532         txq->ctx_curr = 0;
2533         memset((void *)&txq->ctx_cache, 0,
2534                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2535 }
2536
2537 static const struct ixgbe_txq_ops def_txq_ops = {
2538         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2539         .free_swring = ixgbe_tx_free_swring,
2540         .reset = ixgbe_reset_tx_queue,
2541 };
2542
2543 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2544  * the queue parameters. Used in tx_queue_setup by primary process and then
2545  * in dev_init by secondary process when attaching to an existing ethdev.
2546  */
2547 void __rte_cold
2548 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2549 {
2550         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2551         if ((txq->offloads == 0) &&
2552 #ifdef RTE_LIB_SECURITY
2553                         !(txq->using_ipsec) &&
2554 #endif
2555                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2556                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2557                 dev->tx_pkt_prepare = NULL;
2558                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2559                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2560                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2561                                         ixgbe_txq_vec_setup(txq) == 0)) {
2562                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2563                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2564                 } else
2565                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2566         } else {
2567                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2568                 PMD_INIT_LOG(DEBUG,
2569                                 " - offloads = 0x%" PRIx64,
2570                                 txq->offloads);
2571                 PMD_INIT_LOG(DEBUG,
2572                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2573                                 (unsigned long)txq->tx_rs_thresh,
2574                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2575                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2576                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2577         }
2578 }
2579
2580 uint64_t
2581 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2582 {
2583         RTE_SET_USED(dev);
2584
2585         return 0;
2586 }
2587
2588 uint64_t
2589 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2590 {
2591         uint64_t tx_offload_capa;
2592         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2593
2594         tx_offload_capa =
2595                 RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
2596                 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM  |
2597                 RTE_ETH_TX_OFFLOAD_UDP_CKSUM   |
2598                 RTE_ETH_TX_OFFLOAD_TCP_CKSUM   |
2599                 RTE_ETH_TX_OFFLOAD_SCTP_CKSUM  |
2600                 RTE_ETH_TX_OFFLOAD_TCP_TSO     |
2601                 RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2602
2603         if (hw->mac.type == ixgbe_mac_82599EB ||
2604             hw->mac.type == ixgbe_mac_X540)
2605                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_MACSEC_INSERT;
2606
2607         if (hw->mac.type == ixgbe_mac_X550 ||
2608             hw->mac.type == ixgbe_mac_X550EM_x ||
2609             hw->mac.type == ixgbe_mac_X550EM_a)
2610                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2611
2612 #ifdef RTE_LIB_SECURITY
2613         if (dev->security_ctx)
2614                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_SECURITY;
2615 #endif
2616         return tx_offload_capa;
2617 }
2618
2619 int __rte_cold
2620 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2621                          uint16_t queue_idx,
2622                          uint16_t nb_desc,
2623                          unsigned int socket_id,
2624                          const struct rte_eth_txconf *tx_conf)
2625 {
2626         const struct rte_memzone *tz;
2627         struct ixgbe_tx_queue *txq;
2628         struct ixgbe_hw     *hw;
2629         uint16_t tx_rs_thresh, tx_free_thresh;
2630         uint64_t offloads;
2631
2632         PMD_INIT_FUNC_TRACE();
2633         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2634
2635         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2636
2637         /*
2638          * Validate number of transmit descriptors.
2639          * It must not exceed hardware maximum, and must be multiple
2640          * of IXGBE_ALIGN.
2641          */
2642         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2643                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2644                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2645                 return -EINVAL;
2646         }
2647
2648         /*
2649          * The following two parameters control the setting of the RS bit on
2650          * transmit descriptors.
2651          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2652          * descriptors have been used.
2653          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2654          * descriptors are used or if the number of descriptors required
2655          * to transmit a packet is greater than the number of free TX
2656          * descriptors.
2657          * The following constraints must be satisfied:
2658          *  tx_rs_thresh must be greater than 0.
2659          *  tx_rs_thresh must be less than the size of the ring minus 2.
2660          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2661          *  tx_rs_thresh must be a divisor of the ring size.
2662          *  tx_free_thresh must be greater than 0.
2663          *  tx_free_thresh must be less than the size of the ring minus 3.
2664          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2665          * One descriptor in the TX ring is used as a sentinel to avoid a
2666          * H/W race condition, hence the maximum threshold constraints.
2667          * When set to zero use default values.
2668          */
2669         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2670                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2671         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2672         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2673                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2674         if (tx_conf->tx_rs_thresh > 0)
2675                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2676         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2677                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2678                              "exceed nb_desc. (tx_rs_thresh=%u "
2679                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2680                              (unsigned int)tx_rs_thresh,
2681                              (unsigned int)tx_free_thresh,
2682                              (unsigned int)nb_desc,
2683                              (int)dev->data->port_id,
2684                              (int)queue_idx);
2685                 return -(EINVAL);
2686         }
2687         if (tx_rs_thresh >= (nb_desc - 2)) {
2688                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2689                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2690                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2691                         (int)dev->data->port_id, (int)queue_idx);
2692                 return -(EINVAL);
2693         }
2694         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2695                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2696                         "(tx_rs_thresh=%u port=%d queue=%d)",
2697                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2698                         (int)dev->data->port_id, (int)queue_idx);
2699                 return -(EINVAL);
2700         }
2701         if (tx_free_thresh >= (nb_desc - 3)) {
2702                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2703                              "tx_free_thresh must be less than the number of "
2704                              "TX descriptors minus 3. (tx_free_thresh=%u "
2705                              "port=%d queue=%d)",
2706                              (unsigned int)tx_free_thresh,
2707                              (int)dev->data->port_id, (int)queue_idx);
2708                 return -(EINVAL);
2709         }
2710         if (tx_rs_thresh > tx_free_thresh) {
2711                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2712                              "tx_free_thresh. (tx_free_thresh=%u "
2713                              "tx_rs_thresh=%u port=%d queue=%d)",
2714                              (unsigned int)tx_free_thresh,
2715                              (unsigned int)tx_rs_thresh,
2716                              (int)dev->data->port_id,
2717                              (int)queue_idx);
2718                 return -(EINVAL);
2719         }
2720         if ((nb_desc % tx_rs_thresh) != 0) {
2721                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2722                              "number of TX descriptors. (tx_rs_thresh=%u "
2723                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2724                              (int)dev->data->port_id, (int)queue_idx);
2725                 return -(EINVAL);
2726         }
2727
2728         /*
2729          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2730          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2731          * by the NIC and all descriptors are written back after the NIC
2732          * accumulates WTHRESH descriptors.
2733          */
2734         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2735                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2736                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2737                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2738                              (int)dev->data->port_id, (int)queue_idx);
2739                 return -(EINVAL);
2740         }
2741
2742         /* Free memory prior to re-allocation if needed... */
2743         if (dev->data->tx_queues[queue_idx] != NULL) {
2744                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2745                 dev->data->tx_queues[queue_idx] = NULL;
2746         }
2747
2748         /* First allocate the tx queue data structure */
2749         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2750                                  RTE_CACHE_LINE_SIZE, socket_id);
2751         if (txq == NULL)
2752                 return -ENOMEM;
2753
2754         /*
2755          * Allocate TX ring hardware descriptors. A memzone large enough to
2756          * handle the maximum ring size is allocated in order to allow for
2757          * resizing in later calls to the queue setup function.
2758          */
2759         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2760                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2761                         IXGBE_ALIGN, socket_id);
2762         if (tz == NULL) {
2763                 ixgbe_tx_queue_release(txq);
2764                 return -ENOMEM;
2765         }
2766
2767         txq->mz = tz;
2768         txq->nb_tx_desc = nb_desc;
2769         txq->tx_rs_thresh = tx_rs_thresh;
2770         txq->tx_free_thresh = tx_free_thresh;
2771         txq->pthresh = tx_conf->tx_thresh.pthresh;
2772         txq->hthresh = tx_conf->tx_thresh.hthresh;
2773         txq->wthresh = tx_conf->tx_thresh.wthresh;
2774         txq->queue_id = queue_idx;
2775         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2776                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2777         txq->port_id = dev->data->port_id;
2778         txq->offloads = offloads;
2779         txq->ops = &def_txq_ops;
2780         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2781 #ifdef RTE_LIB_SECURITY
2782         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2783                         RTE_ETH_TX_OFFLOAD_SECURITY);
2784 #endif
2785
2786         /*
2787          * Modification to set VFTDT for virtual function if vf is detected
2788          */
2789         if (hw->mac.type == ixgbe_mac_82599_vf ||
2790             hw->mac.type == ixgbe_mac_X540_vf ||
2791             hw->mac.type == ixgbe_mac_X550_vf ||
2792             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2793             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2794                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2795         else
2796                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2797
2798         txq->tx_ring_phys_addr = tz->iova;
2799         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2800
2801         /* Allocate software ring */
2802         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2803                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2804                                 RTE_CACHE_LINE_SIZE, socket_id);
2805         if (txq->sw_ring == NULL) {
2806                 ixgbe_tx_queue_release(txq);
2807                 return -ENOMEM;
2808         }
2809         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2810                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2811
2812         /* set up vector or scalar TX function as appropriate */
2813         ixgbe_set_tx_function(dev, txq);
2814
2815         txq->ops->reset(txq);
2816
2817         dev->data->tx_queues[queue_idx] = txq;
2818
2819
2820         return 0;
2821 }
2822
2823 /**
2824  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2825  *
2826  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2827  * in the sw_rsc_ring is not set to NULL but rather points to the next
2828  * mbuf of this RSC aggregation (that has not been completed yet and still
2829  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2830  * will just free first "nb_segs" segments of the cluster explicitly by calling
2831  * an rte_pktmbuf_free_seg().
2832  *
2833  * @m scattered cluster head
2834  */
2835 static void __rte_cold
2836 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2837 {
2838         uint16_t i, nb_segs = m->nb_segs;
2839         struct rte_mbuf *next_seg;
2840
2841         for (i = 0; i < nb_segs; i++) {
2842                 next_seg = m->next;
2843                 rte_pktmbuf_free_seg(m);
2844                 m = next_seg;
2845         }
2846 }
2847
2848 static void __rte_cold
2849 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2850 {
2851         unsigned i;
2852
2853         /* SSE Vector driver has a different way of releasing mbufs. */
2854         if (rxq->rx_using_sse) {
2855                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2856                 return;
2857         }
2858
2859         if (rxq->sw_ring != NULL) {
2860                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2861                         if (rxq->sw_ring[i].mbuf != NULL) {
2862                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2863                                 rxq->sw_ring[i].mbuf = NULL;
2864                         }
2865                 }
2866                 if (rxq->rx_nb_avail) {
2867                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2868                                 struct rte_mbuf *mb;
2869
2870                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2871                                 rte_pktmbuf_free_seg(mb);
2872                         }
2873                         rxq->rx_nb_avail = 0;
2874                 }
2875         }
2876
2877         if (rxq->sw_sc_ring)
2878                 for (i = 0; i < rxq->nb_rx_desc; i++)
2879                         if (rxq->sw_sc_ring[i].fbuf) {
2880                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2881                                 rxq->sw_sc_ring[i].fbuf = NULL;
2882                         }
2883 }
2884
2885 static void __rte_cold
2886 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2887 {
2888         if (rxq != NULL) {
2889                 ixgbe_rx_queue_release_mbufs(rxq);
2890                 rte_free(rxq->sw_ring);
2891                 rte_free(rxq->sw_sc_ring);
2892                 rte_memzone_free(rxq->mz);
2893                 rte_free(rxq);
2894         }
2895 }
2896
2897 void __rte_cold
2898 ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
2899 {
2900         ixgbe_rx_queue_release(dev->data->rx_queues[qid]);
2901 }
2902
2903 /*
2904  * Check if Rx Burst Bulk Alloc function can be used.
2905  * Return
2906  *        0: the preconditions are satisfied and the bulk allocation function
2907  *           can be used.
2908  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2909  *           function must be used.
2910  */
2911 static inline int __rte_cold
2912 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2913 {
2914         int ret = 0;
2915
2916         /*
2917          * Make sure the following pre-conditions are satisfied:
2918          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2919          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2920          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2921          * Scattered packets are not supported.  This should be checked
2922          * outside of this function.
2923          */
2924         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2925                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2926                              "rxq->rx_free_thresh=%d, "
2927                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2928                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2929                 ret = -EINVAL;
2930         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2931                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2932                              "rxq->rx_free_thresh=%d, "
2933                              "rxq->nb_rx_desc=%d",
2934                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2935                 ret = -EINVAL;
2936         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2937                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2938                              "rxq->nb_rx_desc=%d, "
2939                              "rxq->rx_free_thresh=%d",
2940                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2941                 ret = -EINVAL;
2942         }
2943
2944         return ret;
2945 }
2946
2947 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2948 static void __rte_cold
2949 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2950 {
2951         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2952         unsigned i;
2953         uint16_t len = rxq->nb_rx_desc;
2954
2955         /*
2956          * By default, the Rx queue setup function allocates enough memory for
2957          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2958          * extra memory at the end of the descriptor ring to be zero'd out.
2959          */
2960         if (adapter->rx_bulk_alloc_allowed)
2961                 /* zero out extra memory */
2962                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2963
2964         /*
2965          * Zero out HW ring memory. Zero out extra memory at the end of
2966          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2967          * reads extra memory as zeros.
2968          */
2969         for (i = 0; i < len; i++) {
2970                 rxq->rx_ring[i] = zeroed_desc;
2971         }
2972
2973         /*
2974          * initialize extra software ring entries. Space for these extra
2975          * entries is always allocated
2976          */
2977         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2978         for (i = rxq->nb_rx_desc; i < len; ++i) {
2979                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2980         }
2981
2982         rxq->rx_nb_avail = 0;
2983         rxq->rx_next_avail = 0;
2984         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2985         rxq->rx_tail = 0;
2986         rxq->nb_rx_hold = 0;
2987
2988         if (rxq->pkt_first_seg != NULL)
2989                 rte_pktmbuf_free(rxq->pkt_first_seg);
2990
2991         rxq->pkt_first_seg = NULL;
2992         rxq->pkt_last_seg = NULL;
2993
2994 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2995         rxq->rxrearm_start = 0;
2996         rxq->rxrearm_nb = 0;
2997 #endif
2998 }
2999
3000 static int
3001 ixgbe_is_vf(struct rte_eth_dev *dev)
3002 {
3003         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3004
3005         switch (hw->mac.type) {
3006         case ixgbe_mac_82599_vf:
3007         case ixgbe_mac_X540_vf:
3008         case ixgbe_mac_X550_vf:
3009         case ixgbe_mac_X550EM_x_vf:
3010         case ixgbe_mac_X550EM_a_vf:
3011                 return 1;
3012         default:
3013                 return 0;
3014         }
3015 }
3016
3017 uint64_t
3018 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
3019 {
3020         uint64_t offloads = 0;
3021         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3022
3023         if (hw->mac.type != ixgbe_mac_82598EB)
3024                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3025
3026         return offloads;
3027 }
3028
3029 uint64_t
3030 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3031 {
3032         uint64_t offloads;
3033         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3034
3035         offloads = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM  |
3036                    RTE_ETH_RX_OFFLOAD_UDP_CKSUM   |
3037                    RTE_ETH_RX_OFFLOAD_TCP_CKSUM   |
3038                    RTE_ETH_RX_OFFLOAD_KEEP_CRC    |
3039                    RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
3040                    RTE_ETH_RX_OFFLOAD_SCATTER |
3041                    RTE_ETH_RX_OFFLOAD_RSS_HASH;
3042
3043         if (hw->mac.type == ixgbe_mac_82598EB)
3044                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3045
3046         if (ixgbe_is_vf(dev) == 0)
3047                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_EXTEND;
3048
3049         /*
3050          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3051          * mode.
3052          */
3053         if ((hw->mac.type == ixgbe_mac_82599EB ||
3054              hw->mac.type == ixgbe_mac_X540 ||
3055              hw->mac.type == ixgbe_mac_X550) &&
3056             !RTE_ETH_DEV_SRIOV(dev).active)
3057                 offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3058
3059         if (hw->mac.type == ixgbe_mac_82599EB ||
3060             hw->mac.type == ixgbe_mac_X540)
3061                 offloads |= RTE_ETH_RX_OFFLOAD_MACSEC_STRIP;
3062
3063         if (hw->mac.type == ixgbe_mac_X550 ||
3064             hw->mac.type == ixgbe_mac_X550EM_x ||
3065             hw->mac.type == ixgbe_mac_X550EM_a)
3066                 offloads |= RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3067
3068 #ifdef RTE_LIB_SECURITY
3069         if (dev->security_ctx)
3070                 offloads |= RTE_ETH_RX_OFFLOAD_SECURITY;
3071 #endif
3072
3073         return offloads;
3074 }
3075
3076 int __rte_cold
3077 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3078                          uint16_t queue_idx,
3079                          uint16_t nb_desc,
3080                          unsigned int socket_id,
3081                          const struct rte_eth_rxconf *rx_conf,
3082                          struct rte_mempool *mp)
3083 {
3084         const struct rte_memzone *rz;
3085         struct ixgbe_rx_queue *rxq;
3086         struct ixgbe_hw     *hw;
3087         uint16_t len;
3088         struct ixgbe_adapter *adapter = dev->data->dev_private;
3089         uint64_t offloads;
3090
3091         PMD_INIT_FUNC_TRACE();
3092         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3093
3094         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3095
3096         /*
3097          * Validate number of receive descriptors.
3098          * It must not exceed hardware maximum, and must be multiple
3099          * of IXGBE_ALIGN.
3100          */
3101         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3102                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3103                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3104                 return -EINVAL;
3105         }
3106
3107         /* Free memory prior to re-allocation if needed... */
3108         if (dev->data->rx_queues[queue_idx] != NULL) {
3109                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3110                 dev->data->rx_queues[queue_idx] = NULL;
3111         }
3112
3113         /* First allocate the rx queue data structure */
3114         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3115                                  RTE_CACHE_LINE_SIZE, socket_id);
3116         if (rxq == NULL)
3117                 return -ENOMEM;
3118         rxq->mb_pool = mp;
3119         rxq->nb_rx_desc = nb_desc;
3120         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3121         rxq->queue_id = queue_idx;
3122         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3123                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3124         rxq->port_id = dev->data->port_id;
3125         if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
3126                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3127         else
3128                 rxq->crc_len = 0;
3129         rxq->drop_en = rx_conf->rx_drop_en;
3130         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3131         rxq->offloads = offloads;
3132
3133         /*
3134          * The packet type in RX descriptor is different for different NICs.
3135          * Some bits are used for x550 but reserved for other NICS.
3136          * So set different masks for different NICs.
3137          */
3138         if (hw->mac.type == ixgbe_mac_X550 ||
3139             hw->mac.type == ixgbe_mac_X550EM_x ||
3140             hw->mac.type == ixgbe_mac_X550EM_a ||
3141             hw->mac.type == ixgbe_mac_X550_vf ||
3142             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3143             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3144                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3145         else
3146                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3147
3148         /*
3149          * 82599 errata, UDP frames with a 0 checksum can be marked as checksum
3150          * errors.
3151          */
3152         if (hw->mac.type == ixgbe_mac_82599EB)
3153                 rxq->rx_udp_csum_zero_err = 1;
3154
3155         /*
3156          * Allocate RX ring hardware descriptors. A memzone large enough to
3157          * handle the maximum ring size is allocated in order to allow for
3158          * resizing in later calls to the queue setup function.
3159          */
3160         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3161                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3162         if (rz == NULL) {
3163                 ixgbe_rx_queue_release(rxq);
3164                 return -ENOMEM;
3165         }
3166
3167         rxq->mz = rz;
3168         /*
3169          * Zero init all the descriptors in the ring.
3170          */
3171         memset(rz->addr, 0, RX_RING_SZ);
3172
3173         /*
3174          * Modified to setup VFRDT for Virtual Function
3175          */
3176         if (hw->mac.type == ixgbe_mac_82599_vf ||
3177             hw->mac.type == ixgbe_mac_X540_vf ||
3178             hw->mac.type == ixgbe_mac_X550_vf ||
3179             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3180             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3181                 rxq->rdt_reg_addr =
3182                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3183                 rxq->rdh_reg_addr =
3184                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3185         } else {
3186                 rxq->rdt_reg_addr =
3187                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3188                 rxq->rdh_reg_addr =
3189                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3190         }
3191
3192         rxq->rx_ring_phys_addr = rz->iova;
3193         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3194
3195         /*
3196          * Certain constraints must be met in order to use the bulk buffer
3197          * allocation Rx burst function. If any of Rx queues doesn't meet them
3198          * the feature should be disabled for the whole port.
3199          */
3200         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3201                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3202                                     "preconditions - canceling the feature for "
3203                                     "the whole port[%d]",
3204                              rxq->queue_id, rxq->port_id);
3205                 adapter->rx_bulk_alloc_allowed = false;
3206         }
3207
3208         /*
3209          * Allocate software ring. Allow for space at the end of the
3210          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3211          * function does not access an invalid memory region.
3212          */
3213         len = nb_desc;
3214         if (adapter->rx_bulk_alloc_allowed)
3215                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3216
3217         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3218                                           sizeof(struct ixgbe_rx_entry) * len,
3219                                           RTE_CACHE_LINE_SIZE, socket_id);
3220         if (!rxq->sw_ring) {
3221                 ixgbe_rx_queue_release(rxq);
3222                 return -ENOMEM;
3223         }
3224
3225         /*
3226          * Always allocate even if it's not going to be needed in order to
3227          * simplify the code.
3228          *
3229          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3230          * be requested in ixgbe_dev_rx_init(), which is called later from
3231          * dev_start() flow.
3232          */
3233         rxq->sw_sc_ring =
3234                 rte_zmalloc_socket("rxq->sw_sc_ring",
3235                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3236                                    RTE_CACHE_LINE_SIZE, socket_id);
3237         if (!rxq->sw_sc_ring) {
3238                 ixgbe_rx_queue_release(rxq);
3239                 return -ENOMEM;
3240         }
3241
3242         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3243                             "dma_addr=0x%"PRIx64,
3244                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3245                      rxq->rx_ring_phys_addr);
3246
3247         if (!rte_is_power_of_2(nb_desc)) {
3248                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3249                                     "preconditions - canceling the feature for "
3250                                     "the whole port[%d]",
3251                              rxq->queue_id, rxq->port_id);
3252                 adapter->rx_vec_allowed = false;
3253         } else
3254                 ixgbe_rxq_vec_setup(rxq);
3255
3256         dev->data->rx_queues[queue_idx] = rxq;
3257
3258         ixgbe_reset_rx_queue(adapter, rxq);
3259
3260         return 0;
3261 }
3262
3263 uint32_t
3264 ixgbe_dev_rx_queue_count(void *rx_queue)
3265 {
3266 #define IXGBE_RXQ_SCAN_INTERVAL 4
3267         volatile union ixgbe_adv_rx_desc *rxdp;
3268         struct ixgbe_rx_queue *rxq;
3269         uint32_t desc = 0;
3270
3271         rxq = rx_queue;
3272         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3273
3274         while ((desc < rxq->nb_rx_desc) &&
3275                 (rxdp->wb.upper.status_error &
3276                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3277                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3278                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3279                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3280                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3281                                 desc - rxq->nb_rx_desc]);
3282         }
3283
3284         return desc;
3285 }
3286
3287 int
3288 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3289 {
3290         struct ixgbe_rx_queue *rxq = rx_queue;
3291         volatile uint32_t *status;
3292         uint32_t nb_hold, desc;
3293
3294         if (unlikely(offset >= rxq->nb_rx_desc))
3295                 return -EINVAL;
3296
3297 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3298         if (rxq->rx_using_sse)
3299                 nb_hold = rxq->rxrearm_nb;
3300         else
3301 #endif
3302                 nb_hold = rxq->nb_rx_hold;
3303         if (offset >= rxq->nb_rx_desc - nb_hold)
3304                 return RTE_ETH_RX_DESC_UNAVAIL;
3305
3306         desc = rxq->rx_tail + offset;
3307         if (desc >= rxq->nb_rx_desc)
3308                 desc -= rxq->nb_rx_desc;
3309
3310         status = &rxq->rx_ring[desc].wb.upper.status_error;
3311         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3312                 return RTE_ETH_RX_DESC_DONE;
3313
3314         return RTE_ETH_RX_DESC_AVAIL;
3315 }
3316
3317 int
3318 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3319 {
3320         struct ixgbe_tx_queue *txq = tx_queue;
3321         volatile uint32_t *status;
3322         uint32_t desc;
3323
3324         if (unlikely(offset >= txq->nb_tx_desc))
3325                 return -EINVAL;
3326
3327         desc = txq->tx_tail + offset;
3328         /* go to next desc that has the RS bit */
3329         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3330                 txq->tx_rs_thresh;
3331         if (desc >= txq->nb_tx_desc) {
3332                 desc -= txq->nb_tx_desc;
3333                 if (desc >= txq->nb_tx_desc)
3334                         desc -= txq->nb_tx_desc;
3335         }
3336
3337         status = &txq->tx_ring[desc].wb.status;
3338         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3339                 return RTE_ETH_TX_DESC_DONE;
3340
3341         return RTE_ETH_TX_DESC_FULL;
3342 }
3343
3344 /*
3345  * Set up link loopback for X540/X550 mode Tx->Rx.
3346  */
3347 static inline void __rte_cold
3348 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3349 {
3350         uint32_t macc;
3351         PMD_INIT_FUNC_TRACE();
3352
3353         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3354
3355         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3356                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3357         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3358
3359         if (enable) {
3360                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3361                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3362                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3363                 macc |= IXGBE_MACC_FLU;
3364         } else {
3365                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3366                 macc &= ~IXGBE_MACC_FLU;
3367         }
3368
3369         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3370                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3371
3372         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3373 }
3374
3375 void __rte_cold
3376 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3377 {
3378         unsigned i;
3379         struct ixgbe_adapter *adapter = dev->data->dev_private;
3380         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3381
3382         PMD_INIT_FUNC_TRACE();
3383
3384         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3385                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3386
3387                 if (txq != NULL) {
3388                         txq->ops->release_mbufs(txq);
3389                         txq->ops->reset(txq);
3390                 }
3391         }
3392
3393         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3394                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3395
3396                 if (rxq != NULL) {
3397                         ixgbe_rx_queue_release_mbufs(rxq);
3398                         ixgbe_reset_rx_queue(adapter, rxq);
3399                 }
3400         }
3401         /* If loopback mode was enabled, reconfigure the link accordingly */
3402         if (dev->data->dev_conf.lpbk_mode != 0) {
3403                 if (hw->mac.type == ixgbe_mac_X540 ||
3404                      hw->mac.type == ixgbe_mac_X550 ||
3405                      hw->mac.type == ixgbe_mac_X550EM_x ||
3406                      hw->mac.type == ixgbe_mac_X550EM_a)
3407                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3408         }
3409 }
3410
3411 void
3412 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3413 {
3414         unsigned i;
3415
3416         PMD_INIT_FUNC_TRACE();
3417
3418         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3419                 ixgbe_dev_rx_queue_release(dev, i);
3420                 dev->data->rx_queues[i] = NULL;
3421         }
3422         dev->data->nb_rx_queues = 0;
3423
3424         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3425                 ixgbe_dev_tx_queue_release(dev, i);
3426                 dev->data->tx_queues[i] = NULL;
3427         }
3428         dev->data->nb_tx_queues = 0;
3429 }
3430
3431 /*********************************************************************
3432  *
3433  *  Device RX/TX init functions
3434  *
3435  **********************************************************************/
3436
3437 /**
3438  * Receive Side Scaling (RSS)
3439  * See section 7.1.2.8 in the following document:
3440  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3441  *
3442  * Principles:
3443  * The source and destination IP addresses of the IP header and the source
3444  * and destination ports of TCP/UDP headers, if any, of received packets are
3445  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3446  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3447  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3448  * RSS output index which is used as the RX queue index where to store the
3449  * received packets.
3450  * The following output is supplied in the RX write-back descriptor:
3451  *     - 32-bit result of the Microsoft RSS hash function,
3452  *     - 4-bit RSS type field.
3453  */
3454
3455 /*
3456  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3457  * Used as the default key.
3458  */
3459 static uint8_t rss_intel_key[40] = {
3460         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3461         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3462         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3463         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3464         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3465 };
3466
3467 static void
3468 ixgbe_rss_disable(struct rte_eth_dev *dev)
3469 {
3470         struct ixgbe_hw *hw;
3471         uint32_t mrqc;
3472         uint32_t mrqc_reg;
3473
3474         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3475         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3476         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3477         mrqc &= ~IXGBE_MRQC_RSSEN;
3478         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3479 }
3480
3481 static void
3482 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3483 {
3484         uint8_t  *hash_key;
3485         uint32_t mrqc;
3486         uint32_t rss_key;
3487         uint64_t rss_hf;
3488         uint16_t i;
3489         uint32_t mrqc_reg;
3490         uint32_t rssrk_reg;
3491
3492         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3493         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3494
3495         hash_key = rss_conf->rss_key;
3496         if (hash_key != NULL) {
3497                 /* Fill in RSS hash key */
3498                 for (i = 0; i < 10; i++) {
3499                         rss_key  = hash_key[(i * 4)];
3500                         rss_key |= hash_key[(i * 4) + 1] << 8;
3501                         rss_key |= hash_key[(i * 4) + 2] << 16;
3502                         rss_key |= hash_key[(i * 4) + 3] << 24;
3503                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3504                 }
3505         }
3506
3507         /* Set configured hashing protocols in MRQC register */
3508         rss_hf = rss_conf->rss_hf;
3509         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3510         if (rss_hf & RTE_ETH_RSS_IPV4)
3511                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3512         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
3513                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3514         if (rss_hf & RTE_ETH_RSS_IPV6)
3515                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3516         if (rss_hf & RTE_ETH_RSS_IPV6_EX)
3517                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3518         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
3519                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3520         if (rss_hf & RTE_ETH_RSS_IPV6_TCP_EX)
3521                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3522         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
3523                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3524         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
3525                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3526         if (rss_hf & RTE_ETH_RSS_IPV6_UDP_EX)
3527                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3528         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3529 }
3530
3531 int
3532 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3533                           struct rte_eth_rss_conf *rss_conf)
3534 {
3535         struct ixgbe_hw *hw;
3536         uint32_t mrqc;
3537         uint64_t rss_hf;
3538         uint32_t mrqc_reg;
3539
3540         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3541
3542         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3543                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3544                         "NIC.");
3545                 return -ENOTSUP;
3546         }
3547         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3548
3549         /*
3550          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3551          *     "RSS enabling cannot be done dynamically while it must be
3552          *      preceded by a software reset"
3553          * Before changing anything, first check that the update RSS operation
3554          * does not attempt to disable RSS, if RSS was enabled at
3555          * initialization time, or does not attempt to enable RSS, if RSS was
3556          * disabled at initialization time.
3557          */
3558         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3559         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3560         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3561                 if (rss_hf != 0) /* Enable RSS */
3562                         return -(EINVAL);
3563                 return 0; /* Nothing to do */
3564         }
3565         /* RSS enabled */
3566         if (rss_hf == 0) /* Disable RSS */
3567                 return -(EINVAL);
3568         ixgbe_hw_rss_hash_set(hw, rss_conf);
3569         return 0;
3570 }
3571
3572 int
3573 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3574                             struct rte_eth_rss_conf *rss_conf)
3575 {
3576         struct ixgbe_hw *hw;
3577         uint8_t *hash_key;
3578         uint32_t mrqc;
3579         uint32_t rss_key;
3580         uint64_t rss_hf;
3581         uint16_t i;
3582         uint32_t mrqc_reg;
3583         uint32_t rssrk_reg;
3584
3585         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3586         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3587         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3588         hash_key = rss_conf->rss_key;
3589         if (hash_key != NULL) {
3590                 /* Return RSS hash key */
3591                 for (i = 0; i < 10; i++) {
3592                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3593                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3594                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3595                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3596                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3597                 }
3598         }
3599
3600         /* Get RSS functions configured in MRQC register */
3601         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3602         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3603                 rss_conf->rss_hf = 0;
3604                 return 0;
3605         }
3606         rss_hf = 0;
3607         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3608                 rss_hf |= RTE_ETH_RSS_IPV4;
3609         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3610                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
3611         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3612                 rss_hf |= RTE_ETH_RSS_IPV6;
3613         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3614                 rss_hf |= RTE_ETH_RSS_IPV6_EX;
3615         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3616                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
3617         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3618                 rss_hf |= RTE_ETH_RSS_IPV6_TCP_EX;
3619         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3620                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
3621         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3622                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
3623         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3624                 rss_hf |= RTE_ETH_RSS_IPV6_UDP_EX;
3625         rss_conf->rss_hf = rss_hf;
3626         return 0;
3627 }
3628
3629 static void
3630 ixgbe_rss_configure(struct rte_eth_dev *dev)
3631 {
3632         struct rte_eth_rss_conf rss_conf;
3633         struct ixgbe_adapter *adapter;
3634         struct ixgbe_hw *hw;
3635         uint32_t reta;
3636         uint16_t i;
3637         uint16_t j;
3638         uint16_t sp_reta_size;
3639         uint32_t reta_reg;
3640
3641         PMD_INIT_FUNC_TRACE();
3642         adapter = dev->data->dev_private;
3643         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3644
3645         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3646
3647         /*
3648          * Fill in redirection table
3649          * The byte-swap is needed because NIC registers are in
3650          * little-endian order.
3651          */
3652         if (adapter->rss_reta_updated == 0) {
3653                 reta = 0;
3654                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3655                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3656
3657                         if (j == dev->data->nb_rx_queues)
3658                                 j = 0;
3659                         reta = (reta << 8) | j;
3660                         if ((i & 3) == 3)
3661                                 IXGBE_WRITE_REG(hw, reta_reg,
3662                                                 rte_bswap32(reta));
3663                 }
3664         }
3665
3666         /*
3667          * Configure the RSS key and the RSS protocols used to compute
3668          * the RSS hash of input packets.
3669          */
3670         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3671         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3672                 ixgbe_rss_disable(dev);
3673                 return;
3674         }
3675         if (rss_conf.rss_key == NULL)
3676                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3677         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3678 }
3679
3680 #define NUM_VFTA_REGISTERS 128
3681 #define NIC_RX_BUFFER_SIZE 0x200
3682 #define X550_RX_BUFFER_SIZE 0x180
3683
3684 static void
3685 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3686 {
3687         struct rte_eth_vmdq_dcb_conf *cfg;
3688         struct ixgbe_hw *hw;
3689         enum rte_eth_nb_pools num_pools;
3690         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3691         uint16_t pbsize;
3692         uint8_t nb_tcs; /* number of traffic classes */
3693         int i;
3694
3695         PMD_INIT_FUNC_TRACE();
3696         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3697         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3698         num_pools = cfg->nb_queue_pools;
3699         /* Check we have a valid number of pools */
3700         if (num_pools != RTE_ETH_16_POOLS && num_pools != RTE_ETH_32_POOLS) {
3701                 ixgbe_rss_disable(dev);
3702                 return;
3703         }
3704         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3705         nb_tcs = (uint8_t)(RTE_ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3706
3707         /*
3708          * RXPBSIZE
3709          * split rx buffer up into sections, each for 1 traffic class
3710          */
3711         switch (hw->mac.type) {
3712         case ixgbe_mac_X550:
3713         case ixgbe_mac_X550EM_x:
3714         case ixgbe_mac_X550EM_a:
3715                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3716                 break;
3717         default:
3718                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3719                 break;
3720         }
3721         for (i = 0; i < nb_tcs; i++) {
3722                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3723
3724                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3725                 /* clear 10 bits. */
3726                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3727                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3728         }
3729         /* zero alloc all unused TCs */
3730         for (i = nb_tcs; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3731                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3732
3733                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3734                 /* clear 10 bits. */
3735                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3736         }
3737
3738         /* MRQC: enable vmdq and dcb */
3739         mrqc = (num_pools == RTE_ETH_16_POOLS) ?
3740                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3741         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3742
3743         /* PFVTCTL: turn on virtualisation and set the default pool */
3744         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3745         if (cfg->enable_default_pool) {
3746                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3747         } else {
3748                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3749         }
3750
3751         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3752
3753         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3754         queue_mapping = 0;
3755         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++)
3756                 /*
3757                  * mapping is done with 3 bits per priority,
3758                  * so shift by i*3 each time
3759                  */
3760                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3761
3762         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3763
3764         /* RTRPCS: DCB related */
3765         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3766
3767         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3768         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3769         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3770         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3771
3772         /* VFTA - enable all vlan filters */
3773         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3774                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3775         }
3776
3777         /* VFRE: pool enabling for receive - 16 or 32 */
3778         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3779                         num_pools == RTE_ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3780
3781         /*
3782          * MPSAR - allow pools to read specific mac addresses
3783          * In this case, all pools should be able to read from mac addr 0
3784          */
3785         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3786         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3787
3788         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3789         for (i = 0; i < cfg->nb_pool_maps; i++) {
3790                 /* set vlan id in VF register and set the valid bit */
3791                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3792                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3793                 /*
3794                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3795                  * pools, we only need to use the first half of the register
3796                  * i.e. bits 0-31
3797                  */
3798                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3799         }
3800 }
3801
3802 /**
3803  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3804  * @dev: pointer to eth_dev structure
3805  * @dcb_config: pointer to ixgbe_dcb_config structure
3806  */
3807 static void
3808 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3809                        struct ixgbe_dcb_config *dcb_config)
3810 {
3811         uint32_t reg;
3812         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3813
3814         PMD_INIT_FUNC_TRACE();
3815         if (hw->mac.type != ixgbe_mac_82598EB) {
3816                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3817                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3818                 reg |= IXGBE_RTTDCS_ARBDIS;
3819                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3820
3821                 /* Enable DCB for Tx with 8 TCs */
3822                 if (dcb_config->num_tcs.pg_tcs == 8) {
3823                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3824                 } else {
3825                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3826                 }
3827                 if (dcb_config->vt_mode)
3828                         reg |= IXGBE_MTQC_VT_ENA;
3829                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3830
3831                 /* Enable the Tx desc arbiter */
3832                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3833                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3834                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3835
3836                 /* Enable Security TX Buffer IFG for DCB */
3837                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3838                 reg |= IXGBE_SECTX_DCB;
3839                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3840         }
3841 }
3842
3843 /**
3844  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3845  * @dev: pointer to rte_eth_dev structure
3846  * @dcb_config: pointer to ixgbe_dcb_config structure
3847  */
3848 static void
3849 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3850                         struct ixgbe_dcb_config *dcb_config)
3851 {
3852         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3853                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3854         struct ixgbe_hw *hw =
3855                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3856
3857         PMD_INIT_FUNC_TRACE();
3858         if (hw->mac.type != ixgbe_mac_82598EB)
3859                 /*PF VF Transmit Enable*/
3860                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3861                         vmdq_tx_conf->nb_queue_pools == RTE_ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3862
3863         /*Configure general DCB TX parameters*/
3864         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3865 }
3866
3867 static void
3868 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3869                         struct ixgbe_dcb_config *dcb_config)
3870 {
3871         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3872                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3873         struct ixgbe_dcb_tc_config *tc;
3874         uint8_t i, j;
3875
3876         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3877         if (vmdq_rx_conf->nb_queue_pools == RTE_ETH_16_POOLS) {
3878                 dcb_config->num_tcs.pg_tcs = RTE_ETH_8_TCS;
3879                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_8_TCS;
3880         } else {
3881                 dcb_config->num_tcs.pg_tcs = RTE_ETH_4_TCS;
3882                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_4_TCS;
3883         }
3884
3885         /* Initialize User Priority to Traffic Class mapping */
3886         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3887                 tc = &dcb_config->tc_config[j];
3888                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3889         }
3890
3891         /* User Priority to Traffic Class mapping */
3892         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3893                 j = vmdq_rx_conf->dcb_tc[i];
3894                 tc = &dcb_config->tc_config[j];
3895                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3896                                                 (uint8_t)(1 << i);
3897         }
3898 }
3899
3900 static void
3901 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3902                         struct ixgbe_dcb_config *dcb_config)
3903 {
3904         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3905                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3906         struct ixgbe_dcb_tc_config *tc;
3907         uint8_t i, j;
3908
3909         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3910         if (vmdq_tx_conf->nb_queue_pools == RTE_ETH_16_POOLS) {
3911                 dcb_config->num_tcs.pg_tcs = RTE_ETH_8_TCS;
3912                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_8_TCS;
3913         } else {
3914                 dcb_config->num_tcs.pg_tcs = RTE_ETH_4_TCS;
3915                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_4_TCS;
3916         }
3917
3918         /* Initialize User Priority to Traffic Class mapping */
3919         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3920                 tc = &dcb_config->tc_config[j];
3921                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3922         }
3923
3924         /* User Priority to Traffic Class mapping */
3925         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3926                 j = vmdq_tx_conf->dcb_tc[i];
3927                 tc = &dcb_config->tc_config[j];
3928                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3929                                                 (uint8_t)(1 << i);
3930         }
3931 }
3932
3933 static void
3934 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3935                 struct ixgbe_dcb_config *dcb_config)
3936 {
3937         struct rte_eth_dcb_rx_conf *rx_conf =
3938                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3939         struct ixgbe_dcb_tc_config *tc;
3940         uint8_t i, j;
3941
3942         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3943         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3944
3945         /* Initialize User Priority to Traffic Class mapping */
3946         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3947                 tc = &dcb_config->tc_config[j];
3948                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3949         }
3950
3951         /* User Priority to Traffic Class mapping */
3952         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3953                 j = rx_conf->dcb_tc[i];
3954                 tc = &dcb_config->tc_config[j];
3955                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3956                                                 (uint8_t)(1 << i);
3957         }
3958 }
3959
3960 static void
3961 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3962                 struct ixgbe_dcb_config *dcb_config)
3963 {
3964         struct rte_eth_dcb_tx_conf *tx_conf =
3965                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3966         struct ixgbe_dcb_tc_config *tc;
3967         uint8_t i, j;
3968
3969         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3970         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3971
3972         /* Initialize User Priority to Traffic Class mapping */
3973         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3974                 tc = &dcb_config->tc_config[j];
3975                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3976         }
3977
3978         /* User Priority to Traffic Class mapping */
3979         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3980                 j = tx_conf->dcb_tc[i];
3981                 tc = &dcb_config->tc_config[j];
3982                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3983                                                 (uint8_t)(1 << i);
3984         }
3985 }
3986
3987 /**
3988  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3989  * @dev: pointer to eth_dev structure
3990  * @dcb_config: pointer to ixgbe_dcb_config structure
3991  */
3992 static void
3993 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3994                        struct ixgbe_dcb_config *dcb_config)
3995 {
3996         uint32_t reg;
3997         uint32_t vlanctrl;
3998         uint8_t i;
3999         uint32_t q;
4000         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4001
4002         PMD_INIT_FUNC_TRACE();
4003         /*
4004          * Disable the arbiter before changing parameters
4005          * (always enable recycle mode; WSP)
4006          */
4007         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
4008         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4009
4010         if (hw->mac.type != ixgbe_mac_82598EB) {
4011                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
4012                 if (dcb_config->num_tcs.pg_tcs == 4) {
4013                         if (dcb_config->vt_mode)
4014                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4015                                         IXGBE_MRQC_VMDQRT4TCEN;
4016                         else {
4017                                 /* no matter the mode is DCB or DCB_RSS, just
4018                                  * set the MRQE to RSSXTCEN. RSS is controlled
4019                                  * by RSS_FIELD
4020                                  */
4021                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4022                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4023                                         IXGBE_MRQC_RTRSS4TCEN;
4024                         }
4025                 }
4026                 if (dcb_config->num_tcs.pg_tcs == 8) {
4027                         if (dcb_config->vt_mode)
4028                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4029                                         IXGBE_MRQC_VMDQRT8TCEN;
4030                         else {
4031                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4032                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4033                                         IXGBE_MRQC_RTRSS8TCEN;
4034                         }
4035                 }
4036
4037                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4038
4039                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4040                         /* Disable drop for all queues in VMDQ mode*/
4041                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4042                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4043                                                 (IXGBE_QDE_WRITE |
4044                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4045                 } else {
4046                         /* Enable drop for all queues in SRIOV mode */
4047                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4048                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4049                                                 (IXGBE_QDE_WRITE |
4050                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4051                                                  IXGBE_QDE_ENABLE));
4052                 }
4053         }
4054
4055         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4056         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4057         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4058         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4059
4060         /* VFTA - enable all vlan filters */
4061         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4062                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4063         }
4064
4065         /*
4066          * Configure Rx packet plane (recycle mode; WSP) and
4067          * enable arbiter
4068          */
4069         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4070         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4071 }
4072
4073 static void
4074 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4075                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4076 {
4077         switch (hw->mac.type) {
4078         case ixgbe_mac_82598EB:
4079                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4080                 break;
4081         case ixgbe_mac_82599EB:
4082         case ixgbe_mac_X540:
4083         case ixgbe_mac_X550:
4084         case ixgbe_mac_X550EM_x:
4085         case ixgbe_mac_X550EM_a:
4086                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4087                                                   tsa, map);
4088                 break;
4089         default:
4090                 break;
4091         }
4092 }
4093
4094 static void
4095 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4096                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4097 {
4098         switch (hw->mac.type) {
4099         case ixgbe_mac_82598EB:
4100                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4101                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4102                 break;
4103         case ixgbe_mac_82599EB:
4104         case ixgbe_mac_X540:
4105         case ixgbe_mac_X550:
4106         case ixgbe_mac_X550EM_x:
4107         case ixgbe_mac_X550EM_a:
4108                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4109                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4110                 break;
4111         default:
4112                 break;
4113         }
4114 }
4115
4116 #define DCB_RX_CONFIG  1
4117 #define DCB_TX_CONFIG  1
4118 #define DCB_TX_PB      1024
4119 /**
4120  * ixgbe_dcb_hw_configure - Enable DCB and configure
4121  * general DCB in VT mode and non-VT mode parameters
4122  * @dev: pointer to rte_eth_dev structure
4123  * @dcb_config: pointer to ixgbe_dcb_config structure
4124  */
4125 static int
4126 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4127                         struct ixgbe_dcb_config *dcb_config)
4128 {
4129         int     ret = 0;
4130         uint8_t i, pfc_en, nb_tcs;
4131         uint16_t pbsize, rx_buffer_size;
4132         uint8_t config_dcb_rx = 0;
4133         uint8_t config_dcb_tx = 0;
4134         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4135         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4136         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4137         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4138         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4139         struct ixgbe_dcb_tc_config *tc;
4140         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4141                 RTE_ETHER_CRC_LEN;
4142         struct ixgbe_hw *hw =
4143                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4144         struct ixgbe_bw_conf *bw_conf =
4145                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4146
4147         switch (dev->data->dev_conf.rxmode.mq_mode) {
4148         case RTE_ETH_MQ_RX_VMDQ_DCB:
4149                 dcb_config->vt_mode = true;
4150                 if (hw->mac.type != ixgbe_mac_82598EB) {
4151                         config_dcb_rx = DCB_RX_CONFIG;
4152                         /*
4153                          *get dcb and VT rx configuration parameters
4154                          *from rte_eth_conf
4155                          */
4156                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4157                         /*Configure general VMDQ and DCB RX parameters*/
4158                         ixgbe_vmdq_dcb_configure(dev);
4159                 }
4160                 break;
4161         case RTE_ETH_MQ_RX_DCB:
4162         case RTE_ETH_MQ_RX_DCB_RSS:
4163                 dcb_config->vt_mode = false;
4164                 config_dcb_rx = DCB_RX_CONFIG;
4165                 /* Get dcb TX configuration parameters from rte_eth_conf */
4166                 ixgbe_dcb_rx_config(dev, dcb_config);
4167                 /*Configure general DCB RX parameters*/
4168                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4169                 break;
4170         default:
4171                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4172                 break;
4173         }
4174         switch (dev->data->dev_conf.txmode.mq_mode) {
4175         case RTE_ETH_MQ_TX_VMDQ_DCB:
4176                 dcb_config->vt_mode = true;
4177                 config_dcb_tx = DCB_TX_CONFIG;
4178                 /* get DCB and VT TX configuration parameters
4179                  * from rte_eth_conf
4180                  */
4181                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4182                 /*Configure general VMDQ and DCB TX parameters*/
4183                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4184                 break;
4185
4186         case RTE_ETH_MQ_TX_DCB:
4187                 dcb_config->vt_mode = false;
4188                 config_dcb_tx = DCB_TX_CONFIG;
4189                 /*get DCB TX configuration parameters from rte_eth_conf*/
4190                 ixgbe_dcb_tx_config(dev, dcb_config);
4191                 /*Configure general DCB TX parameters*/
4192                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4193                 break;
4194         default:
4195                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4196                 break;
4197         }
4198
4199         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4200         /* Unpack map */
4201         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4202         if (nb_tcs == RTE_ETH_4_TCS) {
4203                 /* Avoid un-configured priority mapping to TC0 */
4204                 uint8_t j = 4;
4205                 uint8_t mask = 0xFF;
4206
4207                 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4208                         mask = (uint8_t)(mask & (~(1 << map[i])));
4209                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4210                         if ((mask & 0x1) && j < RTE_ETH_DCB_NUM_USER_PRIORITIES)
4211                                 map[j++] = i;
4212                         mask >>= 1;
4213                 }
4214                 /* Re-configure 4 TCs BW */
4215                 for (i = 0; i < nb_tcs; i++) {
4216                         tc = &dcb_config->tc_config[i];
4217                         if (bw_conf->tc_num != nb_tcs)
4218                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4219                                         (uint8_t)(100 / nb_tcs);
4220                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4221                                                 (uint8_t)(100 / nb_tcs);
4222                 }
4223                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4224                         tc = &dcb_config->tc_config[i];
4225                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4226                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4227                 }
4228         } else {
4229                 /* Re-configure 8 TCs BW */
4230                 for (i = 0; i < nb_tcs; i++) {
4231                         tc = &dcb_config->tc_config[i];
4232                         if (bw_conf->tc_num != nb_tcs)
4233                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4234                                         (uint8_t)(100 / nb_tcs + (i & 1));
4235                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4236                                 (uint8_t)(100 / nb_tcs + (i & 1));
4237                 }
4238         }
4239
4240         switch (hw->mac.type) {
4241         case ixgbe_mac_X550:
4242         case ixgbe_mac_X550EM_x:
4243         case ixgbe_mac_X550EM_a:
4244                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4245                 break;
4246         default:
4247                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4248                 break;
4249         }
4250
4251         if (config_dcb_rx) {
4252                 /* Set RX buffer size */
4253                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4254                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4255
4256                 for (i = 0; i < nb_tcs; i++) {
4257                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4258                 }
4259                 /* zero alloc all unused TCs */
4260                 for (; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++)
4261                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4262         }
4263         if (config_dcb_tx) {
4264                 /* Only support an equally distributed
4265                  *  Tx packet buffer strategy.
4266                  */
4267                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4268                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4269
4270                 for (i = 0; i < nb_tcs; i++) {
4271                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4272                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4273                 }
4274                 /* Clear unused TCs, if any, to zero buffer size*/
4275                 for (; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
4276                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4277                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4278                 }
4279         }
4280
4281         /*Calculates traffic class credits*/
4282         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4283                                 IXGBE_DCB_TX_CONFIG);
4284         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4285                                 IXGBE_DCB_RX_CONFIG);
4286
4287         if (config_dcb_rx) {
4288                 /* Unpack CEE standard containers */
4289                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4290                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4291                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4292                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4293                 /* Configure PG(ETS) RX */
4294                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4295         }
4296
4297         if (config_dcb_tx) {
4298                 /* Unpack CEE standard containers */
4299                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4300                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4301                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4302                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4303                 /* Configure PG(ETS) TX */
4304                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4305         }
4306
4307         /*Configure queue statistics registers*/
4308         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4309
4310         /* Check if the PFC is supported */
4311         if (dev->data->dev_conf.dcb_capability_en & RTE_ETH_DCB_PFC_SUPPORT) {
4312                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4313                 for (i = 0; i < nb_tcs; i++) {
4314                         /*
4315                         * If the TC count is 8,and the default high_water is 48,
4316                         * the low_water is 16 as default.
4317                         */
4318                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4319                         hw->fc.low_water[i] = pbsize / 4;
4320                         /* Enable pfc for this TC */
4321                         tc = &dcb_config->tc_config[i];
4322                         tc->pfc = ixgbe_dcb_pfc_enabled;
4323                 }
4324                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4325                 if (dcb_config->num_tcs.pfc_tcs == RTE_ETH_4_TCS)
4326                         pfc_en &= 0x0F;
4327                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4328         }
4329
4330         return ret;
4331 }
4332
4333 /**
4334  * ixgbe_configure_dcb - Configure DCB  Hardware
4335  * @dev: pointer to rte_eth_dev
4336  */
4337 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4338 {
4339         struct ixgbe_dcb_config *dcb_cfg =
4340                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4341         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4342
4343         PMD_INIT_FUNC_TRACE();
4344
4345         /* check support mq_mode for DCB */
4346         if (dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_VMDQ_DCB &&
4347             dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_DCB &&
4348             dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_DCB_RSS)
4349                 return;
4350
4351         if (dev->data->nb_rx_queues > RTE_ETH_DCB_NUM_QUEUES)
4352                 return;
4353
4354         /** Configure DCB hardware **/
4355         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4356 }
4357
4358 /*
4359  * VMDq only support for 10 GbE NIC.
4360  */
4361 static void
4362 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4363 {
4364         struct rte_eth_vmdq_rx_conf *cfg;
4365         struct ixgbe_hw *hw;
4366         enum rte_eth_nb_pools num_pools;
4367         uint32_t mrqc, vt_ctl, vlanctrl;
4368         uint32_t vmolr = 0;
4369         int i;
4370
4371         PMD_INIT_FUNC_TRACE();
4372         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4373         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4374         num_pools = cfg->nb_queue_pools;
4375
4376         ixgbe_rss_disable(dev);
4377
4378         /* MRQC: enable vmdq */
4379         mrqc = IXGBE_MRQC_VMDQEN;
4380         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4381
4382         /* PFVTCTL: turn on virtualisation and set the default pool */
4383         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4384         if (cfg->enable_default_pool)
4385                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4386         else
4387                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4388
4389         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4390
4391         for (i = 0; i < (int)num_pools; i++) {
4392                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4393                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4394         }
4395
4396         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4397         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4398         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4399         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4400
4401         /* VFTA - enable all vlan filters */
4402         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4403                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4404
4405         /* VFRE: pool enabling for receive - 64 */
4406         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4407         if (num_pools == RTE_ETH_64_POOLS)
4408                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4409
4410         /*
4411          * MPSAR - allow pools to read specific mac addresses
4412          * In this case, all pools should be able to read from mac addr 0
4413          */
4414         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4415         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4416
4417         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4418         for (i = 0; i < cfg->nb_pool_maps; i++) {
4419                 /* set vlan id in VF register and set the valid bit */
4420                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4421                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4422                 /*
4423                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4424                  * pools, we only need to use the first half of the register
4425                  * i.e. bits 0-31
4426                  */
4427                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4428                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4429                                         (cfg->pool_map[i].pools & UINT32_MAX));
4430                 else
4431                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4432                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4433
4434         }
4435
4436         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4437         if (cfg->enable_loop_back) {
4438                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4439                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4440                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4441         }
4442
4443         IXGBE_WRITE_FLUSH(hw);
4444 }
4445
4446 /*
4447  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4448  * @hw: pointer to hardware structure
4449  */
4450 static void
4451 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4452 {
4453         uint32_t reg;
4454         uint32_t q;
4455
4456         PMD_INIT_FUNC_TRACE();
4457         /*PF VF Transmit Enable*/
4458         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4459         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4460
4461         /* Disable the Tx desc arbiter so that MTQC can be changed */
4462         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4463         reg |= IXGBE_RTTDCS_ARBDIS;
4464         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4465
4466         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4467         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4468
4469         /* Disable drop for all queues */
4470         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4471                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4472                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4473
4474         /* Enable the Tx desc arbiter */
4475         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4476         reg &= ~IXGBE_RTTDCS_ARBDIS;
4477         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4478
4479         IXGBE_WRITE_FLUSH(hw);
4480 }
4481
4482 static int __rte_cold
4483 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4484 {
4485         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4486         uint64_t dma_addr;
4487         unsigned int i;
4488
4489         /* Initialize software ring entries */
4490         for (i = 0; i < rxq->nb_rx_desc; i++) {
4491                 volatile union ixgbe_adv_rx_desc *rxd;
4492                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4493
4494                 if (mbuf == NULL) {
4495                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4496                                      (unsigned) rxq->queue_id);
4497                         return -ENOMEM;
4498                 }
4499
4500                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4501                 mbuf->port = rxq->port_id;
4502
4503                 dma_addr =
4504                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4505                 rxd = &rxq->rx_ring[i];
4506                 rxd->read.hdr_addr = 0;
4507                 rxd->read.pkt_addr = dma_addr;
4508                 rxe[i].mbuf = mbuf;
4509         }
4510
4511         return 0;
4512 }
4513
4514 static int
4515 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4516 {
4517         struct ixgbe_hw *hw;
4518         uint32_t mrqc;
4519
4520         ixgbe_rss_configure(dev);
4521
4522         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4523
4524         /* MRQC: enable VF RSS */
4525         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4526         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4527         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4528         case RTE_ETH_64_POOLS:
4529                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4530                 break;
4531
4532         case RTE_ETH_32_POOLS:
4533                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4534                 break;
4535
4536         default:
4537                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4538                 return -EINVAL;
4539         }
4540
4541         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4542
4543         return 0;
4544 }
4545
4546 static int
4547 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4548 {
4549         struct ixgbe_hw *hw =
4550                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4551
4552         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4553         case RTE_ETH_64_POOLS:
4554                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4555                         IXGBE_MRQC_VMDQEN);
4556                 break;
4557
4558         case RTE_ETH_32_POOLS:
4559                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4560                         IXGBE_MRQC_VMDQRT4TCEN);
4561                 break;
4562
4563         case RTE_ETH_16_POOLS:
4564                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4565                         IXGBE_MRQC_VMDQRT8TCEN);
4566                 break;
4567         default:
4568                 PMD_INIT_LOG(ERR,
4569                         "invalid pool number in IOV mode");
4570                 break;
4571         }
4572         return 0;
4573 }
4574
4575 static int
4576 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4577 {
4578         struct ixgbe_hw *hw =
4579                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4580
4581         if (hw->mac.type == ixgbe_mac_82598EB)
4582                 return 0;
4583
4584         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4585                 /*
4586                  * SRIOV inactive scheme
4587                  * any DCB/RSS w/o VMDq multi-queue setting
4588                  */
4589                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4590                 case RTE_ETH_MQ_RX_RSS:
4591                 case RTE_ETH_MQ_RX_DCB_RSS:
4592                 case RTE_ETH_MQ_RX_VMDQ_RSS:
4593                         ixgbe_rss_configure(dev);
4594                         break;
4595
4596                 case RTE_ETH_MQ_RX_VMDQ_DCB:
4597                         ixgbe_vmdq_dcb_configure(dev);
4598                         break;
4599
4600                 case RTE_ETH_MQ_RX_VMDQ_ONLY:
4601                         ixgbe_vmdq_rx_hw_configure(dev);
4602                         break;
4603
4604                 case RTE_ETH_MQ_RX_NONE:
4605                 default:
4606                         /* if mq_mode is none, disable rss mode.*/
4607                         ixgbe_rss_disable(dev);
4608                         break;
4609                 }
4610         } else {
4611                 /* SRIOV active scheme
4612                  * Support RSS together with SRIOV.
4613                  */
4614                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4615                 case RTE_ETH_MQ_RX_RSS:
4616                 case RTE_ETH_MQ_RX_VMDQ_RSS:
4617                         ixgbe_config_vf_rss(dev);
4618                         break;
4619                 case RTE_ETH_MQ_RX_VMDQ_DCB:
4620                 case RTE_ETH_MQ_RX_DCB:
4621                 /* In SRIOV, the configuration is the same as VMDq case */
4622                         ixgbe_vmdq_dcb_configure(dev);
4623                         break;
4624                 /* DCB/RSS together with SRIOV is not supported */
4625                 case RTE_ETH_MQ_RX_VMDQ_DCB_RSS:
4626                 case RTE_ETH_MQ_RX_DCB_RSS:
4627                         PMD_INIT_LOG(ERR,
4628                                 "Could not support DCB/RSS with VMDq & SRIOV");
4629                         return -1;
4630                 default:
4631                         ixgbe_config_vf_default(dev);
4632                         break;
4633                 }
4634         }
4635
4636         return 0;
4637 }
4638
4639 static int
4640 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4641 {
4642         struct ixgbe_hw *hw =
4643                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4644         uint32_t mtqc;
4645         uint32_t rttdcs;
4646
4647         if (hw->mac.type == ixgbe_mac_82598EB)
4648                 return 0;
4649
4650         /* disable arbiter before setting MTQC */
4651         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4652         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4653         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4654
4655         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4656                 /*
4657                  * SRIOV inactive scheme
4658                  * any DCB w/o VMDq multi-queue setting
4659                  */
4660                 if (dev->data->dev_conf.txmode.mq_mode == RTE_ETH_MQ_TX_VMDQ_ONLY)
4661                         ixgbe_vmdq_tx_hw_configure(hw);
4662                 else {
4663                         mtqc = IXGBE_MTQC_64Q_1PB;
4664                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4665                 }
4666         } else {
4667                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4668
4669                 /*
4670                  * SRIOV active scheme
4671                  * FIXME if support DCB together with VMDq & SRIOV
4672                  */
4673                 case RTE_ETH_64_POOLS:
4674                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4675                         break;
4676                 case RTE_ETH_32_POOLS:
4677                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4678                         break;
4679                 case RTE_ETH_16_POOLS:
4680                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4681                                 IXGBE_MTQC_8TC_8TQ;
4682                         break;
4683                 default:
4684                         mtqc = IXGBE_MTQC_64Q_1PB;
4685                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4686                 }
4687                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4688         }
4689
4690         /* re-enable arbiter */
4691         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4692         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4693
4694         return 0;
4695 }
4696
4697 /**
4698  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4699  *
4700  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4701  * spec rev. 3.0 chapter 8.2.3.8.13.
4702  *
4703  * @pool Memory pool of the Rx queue
4704  */
4705 static inline uint32_t
4706 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4707 {
4708         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4709
4710         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4711         uint16_t maxdesc =
4712                 RTE_IPV4_MAX_PKT_LEN /
4713                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4714
4715         if (maxdesc >= 16)
4716                 return IXGBE_RSCCTL_MAXDESC_16;
4717         else if (maxdesc >= 8)
4718                 return IXGBE_RSCCTL_MAXDESC_8;
4719         else if (maxdesc >= 4)
4720                 return IXGBE_RSCCTL_MAXDESC_4;
4721         else
4722                 return IXGBE_RSCCTL_MAXDESC_1;
4723 }
4724
4725 /**
4726  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4727  * interrupt
4728  *
4729  * (Taken from FreeBSD tree)
4730  * (yes this is all very magic and confusing :)
4731  *
4732  * @dev port handle
4733  * @entry the register array entry
4734  * @vector the MSIX vector for this queue
4735  * @type RX/TX/MISC
4736  */
4737 static void
4738 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4739 {
4740         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4741         u32 ivar, index;
4742
4743         vector |= IXGBE_IVAR_ALLOC_VAL;
4744
4745         switch (hw->mac.type) {
4746
4747         case ixgbe_mac_82598EB:
4748                 if (type == -1)
4749                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4750                 else
4751                         entry += (type * 64);
4752                 index = (entry >> 2) & 0x1F;
4753                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4754                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4755                 ivar |= (vector << (8 * (entry & 0x3)));
4756                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4757                 break;
4758
4759         case ixgbe_mac_82599EB:
4760         case ixgbe_mac_X540:
4761                 if (type == -1) { /* MISC IVAR */
4762                         index = (entry & 1) * 8;
4763                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4764                         ivar &= ~(0xFF << index);
4765                         ivar |= (vector << index);
4766                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4767                 } else {        /* RX/TX IVARS */
4768                         index = (16 * (entry & 1)) + (8 * type);
4769                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4770                         ivar &= ~(0xFF << index);
4771                         ivar |= (vector << index);
4772                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4773                 }
4774
4775                 break;
4776
4777         default:
4778                 break;
4779         }
4780 }
4781
4782 void __rte_cold
4783 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4784 {
4785         uint16_t i, rx_using_sse;
4786         struct ixgbe_adapter *adapter = dev->data->dev_private;
4787
4788         /*
4789          * In order to allow Vector Rx there are a few configuration
4790          * conditions to be met and Rx Bulk Allocation should be allowed.
4791          */
4792         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4793             !adapter->rx_bulk_alloc_allowed ||
4794                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4795                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4796                                     "preconditions",
4797                              dev->data->port_id);
4798
4799                 adapter->rx_vec_allowed = false;
4800         }
4801
4802         /*
4803          * Initialize the appropriate LRO callback.
4804          *
4805          * If all queues satisfy the bulk allocation preconditions
4806          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4807          * Otherwise use a single allocation version.
4808          */
4809         if (dev->data->lro) {
4810                 if (adapter->rx_bulk_alloc_allowed) {
4811                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4812                                            "allocation version");
4813                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4814                 } else {
4815                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4816                                            "allocation version");
4817                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4818                 }
4819         } else if (dev->data->scattered_rx) {
4820                 /*
4821                  * Set the non-LRO scattered callback: there are Vector and
4822                  * single allocation versions.
4823                  */
4824                 if (adapter->rx_vec_allowed) {
4825                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4826                                             "callback (port=%d).",
4827                                      dev->data->port_id);
4828
4829                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4830                 } else if (adapter->rx_bulk_alloc_allowed) {
4831                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4832                                            "allocation callback (port=%d).",
4833                                      dev->data->port_id);
4834                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4835                 } else {
4836                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4837                                             "single allocation) "
4838                                             "Scattered Rx callback "
4839                                             "(port=%d).",
4840                                      dev->data->port_id);
4841
4842                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4843                 }
4844         /*
4845          * Below we set "simple" callbacks according to port/queues parameters.
4846          * If parameters allow we are going to choose between the following
4847          * callbacks:
4848          *    - Vector
4849          *    - Bulk Allocation
4850          *    - Single buffer allocation (the simplest one)
4851          */
4852         } else if (adapter->rx_vec_allowed) {
4853                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4854                                     "burst size no less than %d (port=%d).",
4855                              RTE_IXGBE_DESCS_PER_LOOP,
4856                              dev->data->port_id);
4857
4858                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4859         } else if (adapter->rx_bulk_alloc_allowed) {
4860                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4861                                     "satisfied. Rx Burst Bulk Alloc function "
4862                                     "will be used on port=%d.",
4863                              dev->data->port_id);
4864
4865                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4866         } else {
4867                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4868                                     "satisfied, or Scattered Rx is requested "
4869                                     "(port=%d).",
4870                              dev->data->port_id);
4871
4872                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4873         }
4874
4875         /* Propagate information about RX function choice through all queues. */
4876
4877         rx_using_sse =
4878                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4879                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4880
4881         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4882                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4883
4884                 rxq->rx_using_sse = rx_using_sse;
4885 #ifdef RTE_LIB_SECURITY
4886                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4887                                 RTE_ETH_RX_OFFLOAD_SECURITY);
4888 #endif
4889         }
4890 }
4891
4892 /**
4893  * ixgbe_set_rsc - configure RSC related port HW registers
4894  *
4895  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4896  * of 82599 Spec (x540 configuration is virtually the same).
4897  *
4898  * @dev port handle
4899  *
4900  * Returns 0 in case of success or a non-zero error code
4901  */
4902 static int
4903 ixgbe_set_rsc(struct rte_eth_dev *dev)
4904 {
4905         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4906         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4907         struct rte_eth_dev_info dev_info = { 0 };
4908         bool rsc_capable = false;
4909         uint16_t i;
4910         uint32_t rdrxctl;
4911         uint32_t rfctl;
4912
4913         /* Sanity check */
4914         dev->dev_ops->dev_infos_get(dev, &dev_info);
4915         if (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO)
4916                 rsc_capable = true;
4917
4918         if (!rsc_capable && (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
4919                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4920                                    "support it");
4921                 return -EINVAL;
4922         }
4923
4924         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4925
4926         if ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) &&
4927              (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
4928                 /*
4929                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4930                  * 3.0 RSC configuration requires HW CRC stripping being
4931                  * enabled. If user requested both HW CRC stripping off
4932                  * and RSC on - return an error.
4933                  */
4934                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4935                                     "is disabled");
4936                 return -EINVAL;
4937         }
4938
4939         /* RFCTL configuration  */
4940         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4941         if ((rsc_capable) && (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO))
4942                 rfctl &= ~IXGBE_RFCTL_RSC_DIS;
4943         else
4944                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4945         /* disable NFS filtering */
4946         rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS;
4947         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4948
4949         /* If LRO hasn't been requested - we are done here. */
4950         if (!(rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO))
4951                 return 0;
4952
4953         /* Set RDRXCTL.RSCACKC bit */
4954         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4955         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4956         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4957
4958         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4959         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4960                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4961                 uint32_t srrctl =
4962                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4963                 uint32_t rscctl =
4964                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4965                 uint32_t psrtype =
4966                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4967                 uint32_t eitr =
4968                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4969
4970                 /*
4971                  * ixgbe PMD doesn't support header-split at the moment.
4972                  *
4973                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4974                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4975                  * should be configured even if header split is not
4976                  * enabled. We will configure it 128 bytes following the
4977                  * recommendation in the spec.
4978                  */
4979                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4980                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4981                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4982
4983                 /*
4984                  * TODO: Consider setting the Receive Descriptor Minimum
4985                  * Threshold Size for an RSC case. This is not an obviously
4986                  * beneficiary option but the one worth considering...
4987                  */
4988
4989                 rscctl |= IXGBE_RSCCTL_RSCEN;
4990                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4991                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4992
4993                 /*
4994                  * RSC: Set ITR interval corresponding to 2K ints/s.
4995                  *
4996                  * Full-sized RSC aggregations for a 10Gb/s link will
4997                  * arrive at about 20K aggregation/s rate.
4998                  *
4999                  * 2K inst/s rate will make only 10% of the
5000                  * aggregations to be closed due to the interrupt timer
5001                  * expiration for a streaming at wire-speed case.
5002                  *
5003                  * For a sparse streaming case this setting will yield
5004                  * at most 500us latency for a single RSC aggregation.
5005                  */
5006                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
5007                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
5008                 eitr |= IXGBE_EITR_CNT_WDIS;
5009
5010                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5011                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
5012                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
5013                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
5014
5015                 /*
5016                  * RSC requires the mapping of the queue to the
5017                  * interrupt vector.
5018                  */
5019                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5020         }
5021
5022         dev->data->lro = 1;
5023
5024         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5025
5026         return 0;
5027 }
5028
5029 /*
5030  * Initializes Receive Unit.
5031  */
5032 int __rte_cold
5033 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5034 {
5035         struct ixgbe_hw     *hw;
5036         struct ixgbe_rx_queue *rxq;
5037         uint64_t bus_addr;
5038         uint32_t rxctrl;
5039         uint32_t fctrl;
5040         uint32_t hlreg0;
5041         uint32_t maxfrs;
5042         uint32_t srrctl;
5043         uint32_t rdrxctl;
5044         uint32_t rxcsum;
5045         uint16_t buf_size;
5046         uint16_t i;
5047         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5048         uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
5049         int rc;
5050
5051         PMD_INIT_FUNC_TRACE();
5052         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5053
5054         /*
5055          * Make sure receives are disabled while setting
5056          * up the RX context (registers, descriptor rings, etc.).
5057          */
5058         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5059         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5060
5061         /* Enable receipt of broadcasted frames */
5062         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5063         fctrl |= IXGBE_FCTRL_BAM;
5064         fctrl |= IXGBE_FCTRL_DPF;
5065         fctrl |= IXGBE_FCTRL_PMCF;
5066         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5067
5068         /*
5069          * Configure CRC stripping, if any.
5070          */
5071         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5072         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5073                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5074         else
5075                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5076
5077         /*
5078          * Configure jumbo frame support, if any.
5079          */
5080         if (dev->data->mtu > RTE_ETHER_MTU) {
5081                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5082                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5083                 maxfrs &= 0x0000FFFF;
5084                 maxfrs |= (frame_size << 16);
5085                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5086         } else
5087                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5088
5089         /*
5090          * If loopback mode is configured, set LPBK bit.
5091          */
5092         if (dev->data->dev_conf.lpbk_mode != 0) {
5093                 rc = ixgbe_check_supported_loopback_mode(dev);
5094                 if (rc < 0) {
5095                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5096                         return rc;
5097                 }
5098                 hlreg0 |= IXGBE_HLREG0_LPBK;
5099         } else {
5100                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5101         }
5102
5103         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5104
5105         /*
5106          * Assume no header split and no VLAN strip support
5107          * on any Rx queue first .
5108          */
5109         rx_conf->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5110         /* Setup RX queues */
5111         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5112                 rxq = dev->data->rx_queues[i];
5113
5114                 /*
5115                  * Reset crc_len in case it was changed after queue setup by a
5116                  * call to configure.
5117                  */
5118                 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5119                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5120                 else
5121                         rxq->crc_len = 0;
5122
5123                 /* Setup the Base and Length of the Rx Descriptor Rings */
5124                 bus_addr = rxq->rx_ring_phys_addr;
5125                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5126                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5127                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5128                                 (uint32_t)(bus_addr >> 32));
5129                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5130                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5131                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5132                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5133
5134                 /* Configure the SRRCTL register */
5135                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5136
5137                 /* Set if packets are dropped when no descriptors available */
5138                 if (rxq->drop_en)
5139                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5140
5141                 /*
5142                  * Configure the RX buffer size in the BSIZEPACKET field of
5143                  * the SRRCTL register of the queue.
5144                  * The value is in 1 KB resolution. Valid values can be from
5145                  * 1 KB to 16 KB.
5146                  */
5147                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5148                         RTE_PKTMBUF_HEADROOM);
5149                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5150                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5151
5152                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5153
5154                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5155                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5156
5157                 /* It adds dual VLAN length for supporting dual VLAN */
5158                 if (frame_size + 2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5159                         dev->data->scattered_rx = 1;
5160                 if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
5161                         rx_conf->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5162         }
5163
5164         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
5165                 dev->data->scattered_rx = 1;
5166
5167         /*
5168          * Device configured with multiple RX queues.
5169          */
5170         ixgbe_dev_mq_rx_configure(dev);
5171
5172         /*
5173          * Setup the Checksum Register.
5174          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5175          * Enable IP/L4 checkum computation by hardware if requested to do so.
5176          */
5177         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5178         rxcsum |= IXGBE_RXCSUM_PCSD;
5179         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
5180                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5181         else
5182                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5183
5184         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5185
5186         if (hw->mac.type == ixgbe_mac_82599EB ||
5187             hw->mac.type == ixgbe_mac_X540) {
5188                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5189                 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5190                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5191                 else
5192                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5193                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5194                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5195         }
5196
5197         rc = ixgbe_set_rsc(dev);
5198         if (rc)
5199                 return rc;
5200
5201         ixgbe_set_rx_function(dev);
5202
5203         return 0;
5204 }
5205
5206 /*
5207  * Initializes Transmit Unit.
5208  */
5209 void __rte_cold
5210 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5211 {
5212         struct ixgbe_hw     *hw;
5213         struct ixgbe_tx_queue *txq;
5214         uint64_t bus_addr;
5215         uint32_t hlreg0;
5216         uint32_t txctrl;
5217         uint16_t i;
5218
5219         PMD_INIT_FUNC_TRACE();
5220         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5221
5222         /* Enable TX CRC (checksum offload requirement) and hw padding
5223          * (TSO requirement)
5224          */
5225         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5226         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5227         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5228
5229         /* Setup the Base and Length of the Tx Descriptor Rings */
5230         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5231                 txq = dev->data->tx_queues[i];
5232
5233                 bus_addr = txq->tx_ring_phys_addr;
5234                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5235                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5236                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5237                                 (uint32_t)(bus_addr >> 32));
5238                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5239                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5240                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5241                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5242                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5243
5244                 /*
5245                  * Disable Tx Head Writeback RO bit, since this hoses
5246                  * bookkeeping if things aren't delivered in order.
5247                  */
5248                 switch (hw->mac.type) {
5249                 case ixgbe_mac_82598EB:
5250                         txctrl = IXGBE_READ_REG(hw,
5251                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5252                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5253                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5254                                         txctrl);
5255                         break;
5256
5257                 case ixgbe_mac_82599EB:
5258                 case ixgbe_mac_X540:
5259                 case ixgbe_mac_X550:
5260                 case ixgbe_mac_X550EM_x:
5261                 case ixgbe_mac_X550EM_a:
5262                 default:
5263                         txctrl = IXGBE_READ_REG(hw,
5264                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5265                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5266                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5267                                         txctrl);
5268                         break;
5269                 }
5270         }
5271
5272         /* Device configured with multiple TX queues. */
5273         ixgbe_dev_mq_tx_configure(dev);
5274 }
5275
5276 /*
5277  * Check if requested loopback mode is supported
5278  */
5279 int
5280 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5281 {
5282         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5283
5284         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5285                 if (hw->mac.type == ixgbe_mac_82599EB ||
5286                      hw->mac.type == ixgbe_mac_X540 ||
5287                      hw->mac.type == ixgbe_mac_X550 ||
5288                      hw->mac.type == ixgbe_mac_X550EM_x ||
5289                      hw->mac.type == ixgbe_mac_X550EM_a)
5290                         return 0;
5291
5292         return -ENOTSUP;
5293 }
5294
5295 /*
5296  * Set up link for 82599 loopback mode Tx->Rx.
5297  */
5298 static inline void __rte_cold
5299 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5300 {
5301         PMD_INIT_FUNC_TRACE();
5302
5303         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5304                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5305                                 IXGBE_SUCCESS) {
5306                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5307                         /* ignore error */
5308                         return;
5309                 }
5310         }
5311
5312         /* Restart link */
5313         IXGBE_WRITE_REG(hw,
5314                         IXGBE_AUTOC,
5315                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5316         ixgbe_reset_pipeline_82599(hw);
5317
5318         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5319         msec_delay(50);
5320 }
5321
5322
5323 /*
5324  * Start Transmit and Receive Units.
5325  */
5326 int __rte_cold
5327 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5328 {
5329         struct ixgbe_hw     *hw;
5330         struct ixgbe_tx_queue *txq;
5331         struct ixgbe_rx_queue *rxq;
5332         uint32_t txdctl;
5333         uint32_t dmatxctl;
5334         uint32_t rxctrl;
5335         uint16_t i;
5336         int ret = 0;
5337
5338         PMD_INIT_FUNC_TRACE();
5339         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5340
5341         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5342                 txq = dev->data->tx_queues[i];
5343                 /* Setup Transmit Threshold Registers */
5344                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5345                 txdctl |= txq->pthresh & 0x7F;
5346                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5347                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5348                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5349         }
5350
5351         if (hw->mac.type != ixgbe_mac_82598EB) {
5352                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5353                 dmatxctl |= IXGBE_DMATXCTL_TE;
5354                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5355         }
5356
5357         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5358                 txq = dev->data->tx_queues[i];
5359                 if (!txq->tx_deferred_start) {
5360                         ret = ixgbe_dev_tx_queue_start(dev, i);
5361                         if (ret < 0)
5362                                 return ret;
5363                 }
5364         }
5365
5366         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5367                 rxq = dev->data->rx_queues[i];
5368                 if (!rxq->rx_deferred_start) {
5369                         ret = ixgbe_dev_rx_queue_start(dev, i);
5370                         if (ret < 0)
5371                                 return ret;
5372                 }
5373         }
5374
5375         /* Enable Receive engine */
5376         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5377         if (hw->mac.type == ixgbe_mac_82598EB)
5378                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5379         rxctrl |= IXGBE_RXCTRL_RXEN;
5380         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5381
5382         /* If loopback mode is enabled, set up the link accordingly */
5383         if (dev->data->dev_conf.lpbk_mode != 0) {
5384                 if (hw->mac.type == ixgbe_mac_82599EB)
5385                         ixgbe_setup_loopback_link_82599(hw);
5386                 else if (hw->mac.type == ixgbe_mac_X540 ||
5387                      hw->mac.type == ixgbe_mac_X550 ||
5388                      hw->mac.type == ixgbe_mac_X550EM_x ||
5389                      hw->mac.type == ixgbe_mac_X550EM_a)
5390                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5391         }
5392
5393 #ifdef RTE_LIB_SECURITY
5394         if ((dev->data->dev_conf.rxmode.offloads &
5395                         RTE_ETH_RX_OFFLOAD_SECURITY) ||
5396                 (dev->data->dev_conf.txmode.offloads &
5397                         RTE_ETH_TX_OFFLOAD_SECURITY)) {
5398                 ret = ixgbe_crypto_enable_ipsec(dev);
5399                 if (ret != 0) {
5400                         PMD_DRV_LOG(ERR,
5401                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5402                                     ret);
5403                         return ret;
5404                 }
5405         }
5406 #endif
5407
5408         return 0;
5409 }
5410
5411 /*
5412  * Start Receive Units for specified queue.
5413  */
5414 int __rte_cold
5415 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5416 {
5417         struct ixgbe_hw     *hw;
5418         struct ixgbe_rx_queue *rxq;
5419         uint32_t rxdctl;
5420         int poll_ms;
5421
5422         PMD_INIT_FUNC_TRACE();
5423         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5424
5425         rxq = dev->data->rx_queues[rx_queue_id];
5426
5427         /* Allocate buffers for descriptor rings */
5428         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5429                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5430                              rx_queue_id);
5431                 return -1;
5432         }
5433         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5434         rxdctl |= IXGBE_RXDCTL_ENABLE;
5435         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5436
5437         /* Wait until RX Enable ready */
5438         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5439         do {
5440                 rte_delay_ms(1);
5441                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5442         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5443         if (!poll_ms)
5444                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5445         rte_wmb();
5446         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5447         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5448         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5449
5450         return 0;
5451 }
5452
5453 /*
5454  * Stop Receive Units for specified queue.
5455  */
5456 int __rte_cold
5457 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5458 {
5459         struct ixgbe_hw     *hw;
5460         struct ixgbe_adapter *adapter = dev->data->dev_private;
5461         struct ixgbe_rx_queue *rxq;
5462         uint32_t rxdctl;
5463         int poll_ms;
5464
5465         PMD_INIT_FUNC_TRACE();
5466         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5467
5468         rxq = dev->data->rx_queues[rx_queue_id];
5469
5470         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5471         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5472         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5473
5474         /* Wait until RX Enable bit clear */
5475         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5476         do {
5477                 rte_delay_ms(1);
5478                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5479         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5480         if (!poll_ms)
5481                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5482
5483         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5484
5485         ixgbe_rx_queue_release_mbufs(rxq);
5486         ixgbe_reset_rx_queue(adapter, rxq);
5487         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5488
5489         return 0;
5490 }
5491
5492
5493 /*
5494  * Start Transmit Units for specified queue.
5495  */
5496 int __rte_cold
5497 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5498 {
5499         struct ixgbe_hw     *hw;
5500         struct ixgbe_tx_queue *txq;
5501         uint32_t txdctl;
5502         int poll_ms;
5503
5504         PMD_INIT_FUNC_TRACE();
5505         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5506
5507         txq = dev->data->tx_queues[tx_queue_id];
5508         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5509         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5510         txdctl |= IXGBE_TXDCTL_ENABLE;
5511         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5512
5513         /* Wait until TX Enable ready */
5514         if (hw->mac.type == ixgbe_mac_82599EB) {
5515                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5516                 do {
5517                         rte_delay_ms(1);
5518                         txdctl = IXGBE_READ_REG(hw,
5519                                 IXGBE_TXDCTL(txq->reg_idx));
5520                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5521                 if (!poll_ms)
5522                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5523                                 tx_queue_id);
5524         }
5525         rte_wmb();
5526         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5527         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5528
5529         return 0;
5530 }
5531
5532 /*
5533  * Stop Transmit Units for specified queue.
5534  */
5535 int __rte_cold
5536 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5537 {
5538         struct ixgbe_hw     *hw;
5539         struct ixgbe_tx_queue *txq;
5540         uint32_t txdctl;
5541         uint32_t txtdh, txtdt;
5542         int poll_ms;
5543
5544         PMD_INIT_FUNC_TRACE();
5545         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5546
5547         txq = dev->data->tx_queues[tx_queue_id];
5548
5549         /* Wait until TX queue is empty */
5550         if (hw->mac.type == ixgbe_mac_82599EB) {
5551                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5552                 do {
5553                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5554                         txtdh = IXGBE_READ_REG(hw,
5555                                                IXGBE_TDH(txq->reg_idx));
5556                         txtdt = IXGBE_READ_REG(hw,
5557                                                IXGBE_TDT(txq->reg_idx));
5558                 } while (--poll_ms && (txtdh != txtdt));
5559                 if (!poll_ms)
5560                         PMD_INIT_LOG(ERR,
5561                                 "Tx Queue %d is not empty when stopping.",
5562                                 tx_queue_id);
5563         }
5564
5565         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5566         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5567         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5568
5569         /* Wait until TX Enable bit clear */
5570         if (hw->mac.type == ixgbe_mac_82599EB) {
5571                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5572                 do {
5573                         rte_delay_ms(1);
5574                         txdctl = IXGBE_READ_REG(hw,
5575                                                 IXGBE_TXDCTL(txq->reg_idx));
5576                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5577                 if (!poll_ms)
5578                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5579                                 tx_queue_id);
5580         }
5581
5582         if (txq->ops != NULL) {
5583                 txq->ops->release_mbufs(txq);
5584                 txq->ops->reset(txq);
5585         }
5586         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5587
5588         return 0;
5589 }
5590
5591 void
5592 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5593         struct rte_eth_rxq_info *qinfo)
5594 {
5595         struct ixgbe_rx_queue *rxq;
5596
5597         rxq = dev->data->rx_queues[queue_id];
5598
5599         qinfo->mp = rxq->mb_pool;
5600         qinfo->scattered_rx = dev->data->scattered_rx;
5601         qinfo->nb_desc = rxq->nb_rx_desc;
5602
5603         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5604         qinfo->conf.rx_drop_en = rxq->drop_en;
5605         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5606         qinfo->conf.offloads = rxq->offloads;
5607 }
5608
5609 void
5610 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5611         struct rte_eth_txq_info *qinfo)
5612 {
5613         struct ixgbe_tx_queue *txq;
5614
5615         txq = dev->data->tx_queues[queue_id];
5616
5617         qinfo->nb_desc = txq->nb_tx_desc;
5618
5619         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5620         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5621         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5622
5623         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5624         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5625         qinfo->conf.offloads = txq->offloads;
5626         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5627 }
5628
5629 /*
5630  * [VF] Initializes Receive Unit.
5631  */
5632 int __rte_cold
5633 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5634 {
5635         struct ixgbe_hw     *hw;
5636         struct ixgbe_rx_queue *rxq;
5637         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5638         uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
5639         uint64_t bus_addr;
5640         uint32_t srrctl, psrtype = 0;
5641         uint16_t buf_size;
5642         uint16_t i;
5643         int ret;
5644
5645         PMD_INIT_FUNC_TRACE();
5646         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5647
5648         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5649                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5650                         "it should be power of 2");
5651                 return -1;
5652         }
5653
5654         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5655                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5656                         "it should be equal to or less than %d",
5657                         hw->mac.max_rx_queues);
5658                 return -1;
5659         }
5660
5661         /*
5662          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5663          * disables the VF receipt of packets if the PF MTU is > 1500.
5664          * This is done to deal with 82599 limitations that imposes
5665          * the PF and all VFs to share the same MTU.
5666          * Then, the PF driver enables again the VF receipt of packet when
5667          * the VF driver issues a IXGBE_VF_SET_LPE request.
5668          * In the meantime, the VF device cannot be used, even if the VF driver
5669          * and the Guest VM network stack are ready to accept packets with a
5670          * size up to the PF MTU.
5671          * As a work-around to this PF behaviour, force the call to
5672          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5673          * VF packets received can work in all cases.
5674          */
5675         if (ixgbevf_rlpml_set_vf(hw, frame_size) != 0)
5676                 PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5677                              frame_size);
5678
5679         /*
5680          * Assume no header split and no VLAN strip support
5681          * on any Rx queue first .
5682          */
5683         rxmode->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5684         /* Setup RX queues */
5685         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5686                 rxq = dev->data->rx_queues[i];
5687
5688                 /* Allocate buffers for descriptor rings */
5689                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5690                 if (ret)
5691                         return ret;
5692
5693                 /* Setup the Base and Length of the Rx Descriptor Rings */
5694                 bus_addr = rxq->rx_ring_phys_addr;
5695
5696                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5697                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5698                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5699                                 (uint32_t)(bus_addr >> 32));
5700                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5701                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5702                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5703                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5704
5705
5706                 /* Configure the SRRCTL register */
5707                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5708
5709                 /* Set if packets are dropped when no descriptors available */
5710                 if (rxq->drop_en)
5711                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5712
5713                 /*
5714                  * Configure the RX buffer size in the BSIZEPACKET field of
5715                  * the SRRCTL register of the queue.
5716                  * The value is in 1 KB resolution. Valid values can be from
5717                  * 1 KB to 16 KB.
5718                  */
5719                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5720                         RTE_PKTMBUF_HEADROOM);
5721                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5722                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5723
5724                 /*
5725                  * VF modification to write virtual function SRRCTL register
5726                  */
5727                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5728
5729                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5730                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5731
5732                 if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_SCATTER ||
5733                     /* It adds dual VLAN length for supporting dual VLAN */
5734                     (frame_size + 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5735                         if (!dev->data->scattered_rx)
5736                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5737                         dev->data->scattered_rx = 1;
5738                 }
5739
5740                 if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
5741                         rxmode->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5742         }
5743
5744         /* Set RQPL for VF RSS according to max Rx queue */
5745         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5746                 IXGBE_PSRTYPE_RQPL_SHIFT;
5747         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5748
5749         ixgbe_set_rx_function(dev);
5750
5751         return 0;
5752 }
5753
5754 /*
5755  * [VF] Initializes Transmit Unit.
5756  */
5757 void __rte_cold
5758 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5759 {
5760         struct ixgbe_hw     *hw;
5761         struct ixgbe_tx_queue *txq;
5762         uint64_t bus_addr;
5763         uint32_t txctrl;
5764         uint16_t i;
5765
5766         PMD_INIT_FUNC_TRACE();
5767         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5768
5769         /* Setup the Base and Length of the Tx Descriptor Rings */
5770         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5771                 txq = dev->data->tx_queues[i];
5772                 bus_addr = txq->tx_ring_phys_addr;
5773                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5774                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5775                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5776                                 (uint32_t)(bus_addr >> 32));
5777                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5778                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5779                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5780                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5781                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5782
5783                 /*
5784                  * Disable Tx Head Writeback RO bit, since this hoses
5785                  * bookkeeping if things aren't delivered in order.
5786                  */
5787                 txctrl = IXGBE_READ_REG(hw,
5788                                 IXGBE_VFDCA_TXCTRL(i));
5789                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5790                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5791                                 txctrl);
5792         }
5793 }
5794
5795 /*
5796  * [VF] Start Transmit and Receive Units.
5797  */
5798 void __rte_cold
5799 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5800 {
5801         struct ixgbe_hw     *hw;
5802         struct ixgbe_tx_queue *txq;
5803         struct ixgbe_rx_queue *rxq;
5804         uint32_t txdctl;
5805         uint32_t rxdctl;
5806         uint16_t i;
5807         int poll_ms;
5808
5809         PMD_INIT_FUNC_TRACE();
5810         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5811
5812         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5813                 txq = dev->data->tx_queues[i];
5814                 /* Setup Transmit Threshold Registers */
5815                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5816                 txdctl |= txq->pthresh & 0x7F;
5817                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5818                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5819                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5820         }
5821
5822         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5823
5824                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5825                 txdctl |= IXGBE_TXDCTL_ENABLE;
5826                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5827
5828                 poll_ms = 10;
5829                 /* Wait until TX Enable ready */
5830                 do {
5831                         rte_delay_ms(1);
5832                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5833                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5834                 if (!poll_ms)
5835                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5836         }
5837         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5838
5839                 rxq = dev->data->rx_queues[i];
5840
5841                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5842                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5843                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5844
5845                 /* Wait until RX Enable ready */
5846                 poll_ms = 10;
5847                 do {
5848                         rte_delay_ms(1);
5849                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5850                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5851                 if (!poll_ms)
5852                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5853                 rte_wmb();
5854                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5855
5856         }
5857 }
5858
5859 int
5860 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5861                     const struct rte_flow_action_rss *in)
5862 {
5863         if (in->key_len > RTE_DIM(out->key) ||
5864             in->queue_num > RTE_DIM(out->queue))
5865                 return -EINVAL;
5866         out->conf = (struct rte_flow_action_rss){
5867                 .func = in->func,
5868                 .level = in->level,
5869                 .types = in->types,
5870                 .key_len = in->key_len,
5871                 .queue_num = in->queue_num,
5872                 .key = memcpy(out->key, in->key, in->key_len),
5873                 .queue = memcpy(out->queue, in->queue,
5874                                 sizeof(*in->queue) * in->queue_num),
5875         };
5876         return 0;
5877 }
5878
5879 int
5880 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5881                       const struct rte_flow_action_rss *with)
5882 {
5883         return (comp->func == with->func &&
5884                 comp->level == with->level &&
5885                 comp->types == with->types &&
5886                 comp->key_len == with->key_len &&
5887                 comp->queue_num == with->queue_num &&
5888                 !memcmp(comp->key, with->key, with->key_len) &&
5889                 !memcmp(comp->queue, with->queue,
5890                         sizeof(*with->queue) * with->queue_num));
5891 }
5892
5893 int
5894 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5895                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5896 {
5897         struct ixgbe_hw *hw;
5898         uint32_t reta;
5899         uint16_t i;
5900         uint16_t j;
5901         uint16_t sp_reta_size;
5902         uint32_t reta_reg;
5903         struct rte_eth_rss_conf rss_conf = {
5904                 .rss_key = conf->conf.key_len ?
5905                         (void *)(uintptr_t)conf->conf.key : NULL,
5906                 .rss_key_len = conf->conf.key_len,
5907                 .rss_hf = conf->conf.types,
5908         };
5909         struct ixgbe_filter_info *filter_info =
5910                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5911
5912         PMD_INIT_FUNC_TRACE();
5913         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5914
5915         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5916
5917         if (!add) {
5918                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5919                                           &conf->conf)) {
5920                         ixgbe_rss_disable(dev);
5921                         memset(&filter_info->rss_info, 0,
5922                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5923                         return 0;
5924                 }
5925                 return -EINVAL;
5926         }
5927
5928         if (filter_info->rss_info.conf.queue_num)
5929                 return -EINVAL;
5930         /* Fill in redirection table
5931          * The byte-swap is needed because NIC registers are in
5932          * little-endian order.
5933          */
5934         reta = 0;
5935         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5936                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5937
5938                 if (j == conf->conf.queue_num)
5939                         j = 0;
5940                 reta = (reta << 8) | conf->conf.queue[j];
5941                 if ((i & 3) == 3)
5942                         IXGBE_WRITE_REG(hw, reta_reg,
5943                                         rte_bswap32(reta));
5944         }
5945
5946         /* Configure the RSS key and the RSS protocols used to compute
5947          * the RSS hash of input packets.
5948          */
5949         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5950                 ixgbe_rss_disable(dev);
5951                 return 0;
5952         }
5953         if (rss_conf.rss_key == NULL)
5954                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5955         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5956
5957         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5958                 return -EINVAL;
5959
5960         return 0;
5961 }
5962
5963 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5964 #if defined(RTE_ARCH_PPC_64)
5965 int
5966 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5967 {
5968         return -1;
5969 }
5970
5971 uint16_t
5972 ixgbe_recv_pkts_vec(
5973         void __rte_unused *rx_queue,
5974         struct rte_mbuf __rte_unused **rx_pkts,
5975         uint16_t __rte_unused nb_pkts)
5976 {
5977         return 0;
5978 }
5979
5980 uint16_t
5981 ixgbe_recv_scattered_pkts_vec(
5982         void __rte_unused *rx_queue,
5983         struct rte_mbuf __rte_unused **rx_pkts,
5984         uint16_t __rte_unused nb_pkts)
5985 {
5986         return 0;
5987 }
5988
5989 int
5990 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5991 {
5992         return -1;
5993 }
5994
5995 uint16_t
5996 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5997                 struct rte_mbuf __rte_unused **tx_pkts,
5998                 uint16_t __rte_unused nb_pkts)
5999 {
6000         return 0;
6001 }
6002
6003 int
6004 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
6005 {
6006         return -1;
6007 }
6008
6009 void
6010 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
6011 {
6012         return;
6013 }
6014 #endif