68736afbd6f7a995b702d4256c8c3cf3e61bc286
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
39 #include <rte_udp.h>
40 #include <rte_tcp.h>
41 #include <rte_sctp.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
44 #include <rte_ip.h>
45 #include <rte_net.h>
46 #include <rte_vect.h>
47
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
55
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
58 #else
59 #define IXGBE_TX_IEEE1588_TMST 0
60 #endif
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK (                  \
63                 PKT_TX_OUTER_IPV6 |              \
64                 PKT_TX_OUTER_IPV4 |              \
65                 PKT_TX_IPV6 |                    \
66                 PKT_TX_IPV4 |                    \
67                 PKT_TX_VLAN |            \
68                 PKT_TX_IP_CKSUM |                \
69                 PKT_TX_L4_MASK |                 \
70                 PKT_TX_TCP_SEG |                 \
71                 PKT_TX_MACSEC |                  \
72                 PKT_TX_OUTER_IP_CKSUM |          \
73                 PKT_TX_SEC_OFFLOAD |     \
74                 IXGBE_TX_IEEE1588_TMST)
75
76 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
77                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
78
79 #if 1
80 #define RTE_PMD_USE_PREFETCH
81 #endif
82
83 #ifdef RTE_PMD_USE_PREFETCH
84 /*
85  * Prefetch a cache line into all cache levels.
86  */
87 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
88 #else
89 #define rte_ixgbe_prefetch(p)   do {} while (0)
90 #endif
91
92 /*********************************************************************
93  *
94  *  TX functions
95  *
96  **********************************************************************/
97
98 /*
99  * Check for descriptors with their DD bit set and free mbufs.
100  * Return the total number of buffers freed.
101  */
102 static __rte_always_inline int
103 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
104 {
105         struct ixgbe_tx_entry *txep;
106         uint32_t status;
107         int i, nb_free = 0;
108         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
109
110         /* check DD bit on threshold descriptor */
111         status = txq->tx_ring[txq->tx_next_dd].wb.status;
112         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
113                 return 0;
114
115         /*
116          * first buffer to free from S/W ring is at index
117          * tx_next_dd - (tx_rs_thresh-1)
118          */
119         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
120
121         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
122                 /* free buffers one at a time */
123                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
124                 txep->mbuf = NULL;
125
126                 if (unlikely(m == NULL))
127                         continue;
128
129                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
130                     (nb_free > 0 && m->pool != free[0]->pool)) {
131                         rte_mempool_put_bulk(free[0]->pool,
132                                              (void **)free, nb_free);
133                         nb_free = 0;
134                 }
135
136                 free[nb_free++] = m;
137         }
138
139         if (nb_free > 0)
140                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
141
142         /* buffers were freed, update counters */
143         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
144         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
145         if (txq->tx_next_dd >= txq->nb_tx_desc)
146                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
147
148         return txq->tx_rs_thresh;
149 }
150
151 /* Populate 4 descriptors with data from 4 mbufs */
152 static inline void
153 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
154 {
155         uint64_t buf_dma_addr;
156         uint32_t pkt_len;
157         int i;
158
159         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
160                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
161                 pkt_len = (*pkts)->data_len;
162
163                 /* write data to descriptor */
164                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
165
166                 txdp->read.cmd_type_len =
167                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
168
169                 txdp->read.olinfo_status =
170                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
171
172                 rte_prefetch0(&(*pkts)->pool);
173         }
174 }
175
176 /* Populate 1 descriptor with data from 1 mbuf */
177 static inline void
178 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
179 {
180         uint64_t buf_dma_addr;
181         uint32_t pkt_len;
182
183         buf_dma_addr = rte_mbuf_data_iova(*pkts);
184         pkt_len = (*pkts)->data_len;
185
186         /* write data to descriptor */
187         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
188         txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190         txdp->read.olinfo_status =
191                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
192         rte_prefetch0(&(*pkts)->pool);
193 }
194
195 /*
196  * Fill H/W descriptor ring with mbuf data.
197  * Copy mbuf pointers to the S/W ring.
198  */
199 static inline void
200 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
201                       uint16_t nb_pkts)
202 {
203         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
204         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
205         const int N_PER_LOOP = 4;
206         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
207         int mainpart, leftover;
208         int i, j;
209
210         /*
211          * Process most of the packets in chunks of N pkts.  Any
212          * leftover packets will get processed one at a time.
213          */
214         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
215         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
216         for (i = 0; i < mainpart; i += N_PER_LOOP) {
217                 /* Copy N mbuf pointers to the S/W ring */
218                 for (j = 0; j < N_PER_LOOP; ++j) {
219                         (txep + i + j)->mbuf = *(pkts + i + j);
220                 }
221                 tx4(txdp + i, pkts + i);
222         }
223
224         if (unlikely(leftover > 0)) {
225                 for (i = 0; i < leftover; ++i) {
226                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
227                         tx1(txdp + mainpart + i, pkts + mainpart + i);
228                 }
229         }
230 }
231
232 static inline uint16_t
233 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
234              uint16_t nb_pkts)
235 {
236         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
237         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
238         uint16_t n = 0;
239
240         /*
241          * Begin scanning the H/W ring for done descriptors when the
242          * number of available descriptors drops below tx_free_thresh.  For
243          * each done descriptor, free the associated buffer.
244          */
245         if (txq->nb_tx_free < txq->tx_free_thresh)
246                 ixgbe_tx_free_bufs(txq);
247
248         /* Only use descriptors that are available */
249         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
250         if (unlikely(nb_pkts == 0))
251                 return 0;
252
253         /* Use exactly nb_pkts descriptors */
254         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
255
256         /*
257          * At this point, we know there are enough descriptors in the
258          * ring to transmit all the packets.  This assumes that each
259          * mbuf contains a single segment, and that no new offloads
260          * are expected, which would require a new context descriptor.
261          */
262
263         /*
264          * See if we're going to wrap-around. If so, handle the top
265          * of the descriptor ring first, then do the bottom.  If not,
266          * the processing looks just like the "bottom" part anyway...
267          */
268         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
269                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
270                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
271
272                 /*
273                  * We know that the last descriptor in the ring will need to
274                  * have its RS bit set because tx_rs_thresh has to be
275                  * a divisor of the ring size
276                  */
277                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
278                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
279                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
280
281                 txq->tx_tail = 0;
282         }
283
284         /* Fill H/W descriptor ring with mbuf data */
285         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
286         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
287
288         /*
289          * Determine if RS bit should be set
290          * This is what we actually want:
291          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
292          * but instead of subtracting 1 and doing >=, we can just do
293          * greater than without subtracting.
294          */
295         if (txq->tx_tail > txq->tx_next_rs) {
296                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
297                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
298                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
299                                                 txq->tx_rs_thresh);
300                 if (txq->tx_next_rs >= txq->nb_tx_desc)
301                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302         }
303
304         /*
305          * Check for wrap-around. This would only happen if we used
306          * up to the last descriptor in the ring, no more, no less.
307          */
308         if (txq->tx_tail >= txq->nb_tx_desc)
309                 txq->tx_tail = 0;
310
311         /* update tail pointer */
312         rte_wmb();
313         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
314
315         return nb_pkts;
316 }
317
318 uint16_t
319 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
320                        uint16_t nb_pkts)
321 {
322         uint16_t nb_tx;
323
324         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
325         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
326                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
327
328         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
329         nb_tx = 0;
330         while (nb_pkts) {
331                 uint16_t ret, n;
332
333                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
334                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
335                 nb_tx = (uint16_t)(nb_tx + ret);
336                 nb_pkts = (uint16_t)(nb_pkts - ret);
337                 if (ret < n)
338                         break;
339         }
340
341         return nb_tx;
342 }
343
344 static uint16_t
345 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
346                     uint16_t nb_pkts)
347 {
348         uint16_t nb_tx = 0;
349         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
350
351         while (nb_pkts) {
352                 uint16_t ret, num;
353
354                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
355                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
356                                                  num);
357                 nb_tx += ret;
358                 nb_pkts -= ret;
359                 if (ret < num)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
370                 __rte_unused uint64_t *mdata)
371 {
372         uint32_t type_tucmd_mlhl;
373         uint32_t mss_l4len_idx = 0;
374         uint32_t ctx_idx;
375         uint32_t vlan_macip_lens;
376         union ixgbe_tx_offload tx_offload_mask;
377         uint32_t seqnum_seed = 0;
378
379         ctx_idx = txq->ctx_curr;
380         tx_offload_mask.data[0] = 0;
381         tx_offload_mask.data[1] = 0;
382         type_tucmd_mlhl = 0;
383
384         /* Specify which HW CTX to upload. */
385         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
386
387         if (ol_flags & PKT_TX_VLAN)
388                 tx_offload_mask.vlan_tci |= ~0;
389
390         /* check if TCP segmentation required for this packet */
391         if (ol_flags & PKT_TX_TCP_SEG) {
392                 /* implies IP cksum in IPv4 */
393                 if (ol_flags & PKT_TX_IP_CKSUM)
394                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
397                 else
398                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
401
402                 tx_offload_mask.l2_len |= ~0;
403                 tx_offload_mask.l3_len |= ~0;
404                 tx_offload_mask.l4_len |= ~0;
405                 tx_offload_mask.tso_segsz |= ~0;
406                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408         } else { /* no TSO, check if hardware checksum is needed */
409                 if (ol_flags & PKT_TX_IP_CKSUM) {
410                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411                         tx_offload_mask.l2_len |= ~0;
412                         tx_offload_mask.l3_len |= ~0;
413                 }
414
415                 switch (ol_flags & PKT_TX_L4_MASK) {
416                 case PKT_TX_UDP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
420                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
421                         tx_offload_mask.l2_len |= ~0;
422                         tx_offload_mask.l3_len |= ~0;
423                         break;
424                 case PKT_TX_TCP_CKSUM:
425                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
426                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
427                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
428                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
429                         tx_offload_mask.l2_len |= ~0;
430                         tx_offload_mask.l3_len |= ~0;
431                         break;
432                 case PKT_TX_SCTP_CKSUM:
433                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
434                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
435                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
436                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
437                         tx_offload_mask.l2_len |= ~0;
438                         tx_offload_mask.l3_len |= ~0;
439                         break;
440                 default:
441                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
442                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
443                         break;
444                 }
445         }
446
447         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
448                 tx_offload_mask.outer_l2_len |= ~0;
449                 tx_offload_mask.outer_l3_len |= ~0;
450                 tx_offload_mask.l2_len |= ~0;
451                 seqnum_seed |= tx_offload.outer_l3_len
452                                << IXGBE_ADVTXD_OUTER_IPLEN;
453                 seqnum_seed |= tx_offload.l2_len
454                                << IXGBE_ADVTXD_TUNNEL_LEN;
455         }
456 #ifdef RTE_LIB_SECURITY
457         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
458                 union ixgbe_crypto_tx_desc_md *md =
459                                 (union ixgbe_crypto_tx_desc_md *)mdata;
460                 seqnum_seed |=
461                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
462                 type_tucmd_mlhl |= md->enc ?
463                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
464                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
465                 type_tucmd_mlhl |=
466                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
467                 tx_offload_mask.sa_idx |= ~0;
468                 tx_offload_mask.sec_pad_len |= ~0;
469         }
470 #endif
471
472         txq->ctx_cache[ctx_idx].flags = ol_flags;
473         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
474                 tx_offload_mask.data[0] & tx_offload.data[0];
475         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
476                 tx_offload_mask.data[1] & tx_offload.data[1];
477         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
478
479         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
480         vlan_macip_lens = tx_offload.l3_len;
481         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
482                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
483                                     IXGBE_ADVTXD_MACLEN_SHIFT);
484         else
485                 vlan_macip_lens |= (tx_offload.l2_len <<
486                                     IXGBE_ADVTXD_MACLEN_SHIFT);
487         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
488         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
489         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
490         ctx_txd->seqnum_seed     = seqnum_seed;
491 }
492
493 /*
494  * Check which hardware context can be used. Use the existing match
495  * or create a new context descriptor.
496  */
497 static inline uint32_t
498 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
499                    union ixgbe_tx_offload tx_offload)
500 {
501         /* If match with the current used context */
502         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
503                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
504                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
505                      & tx_offload.data[0])) &&
506                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
507                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
508                      & tx_offload.data[1]))))
509                 return txq->ctx_curr;
510
511         /* What if match with the next context  */
512         txq->ctx_curr ^= 1;
513         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
514                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
515                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
516                      & tx_offload.data[0])) &&
517                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
518                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
519                      & tx_offload.data[1]))))
520                 return txq->ctx_curr;
521
522         /* Mismatch, use the previous context */
523         return IXGBE_CTX_NUM;
524 }
525
526 static inline uint32_t
527 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
528 {
529         uint32_t tmp = 0;
530
531         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
532                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
533         if (ol_flags & PKT_TX_IP_CKSUM)
534                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
535         if (ol_flags & PKT_TX_TCP_SEG)
536                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
537         return tmp;
538 }
539
540 static inline uint32_t
541 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
542 {
543         uint32_t cmdtype = 0;
544
545         if (ol_flags & PKT_TX_VLAN)
546                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
547         if (ol_flags & PKT_TX_TCP_SEG)
548                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
549         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
550                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
551         if (ol_flags & PKT_TX_MACSEC)
552                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
553         return cmdtype;
554 }
555
556 /* Default RS bit threshold values */
557 #ifndef DEFAULT_TX_RS_THRESH
558 #define DEFAULT_TX_RS_THRESH   32
559 #endif
560 #ifndef DEFAULT_TX_FREE_THRESH
561 #define DEFAULT_TX_FREE_THRESH 32
562 #endif
563
564 /* Reset transmit descriptors after they have been used */
565 static inline int
566 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
567 {
568         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
569         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
570         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
571         uint16_t nb_tx_desc = txq->nb_tx_desc;
572         uint16_t desc_to_clean_to;
573         uint16_t nb_tx_to_clean;
574         uint32_t status;
575
576         /* Determine the last descriptor needing to be cleaned */
577         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
578         if (desc_to_clean_to >= nb_tx_desc)
579                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
580
581         /* Check to make sure the last descriptor to clean is done */
582         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
583         status = txr[desc_to_clean_to].wb.status;
584         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
585                 PMD_TX_LOG(DEBUG,
586                            "TX descriptor %4u is not done"
587                            "(port=%d queue=%d)",
588                            desc_to_clean_to,
589                            txq->port_id, txq->queue_id);
590                 /* Failed to clean any descriptors, better luck next time */
591                 return -(1);
592         }
593
594         /* Figure out how many descriptors will be cleaned */
595         if (last_desc_cleaned > desc_to_clean_to)
596                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
597                                                         desc_to_clean_to);
598         else
599                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
600                                                 last_desc_cleaned);
601
602         PMD_TX_LOG(DEBUG,
603                    "Cleaning %4u TX descriptors: %4u to %4u "
604                    "(port=%d queue=%d)",
605                    nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
606                    txq->port_id, txq->queue_id);
607
608         /*
609          * The last descriptor to clean is done, so that means all the
610          * descriptors from the last descriptor that was cleaned
611          * up to the last descriptor with the RS bit set
612          * are done. Only reset the threshold descriptor.
613          */
614         txr[desc_to_clean_to].wb.status = 0;
615
616         /* Update the txq to reflect the last descriptor that was cleaned */
617         txq->last_desc_cleaned = desc_to_clean_to;
618         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
619
620         /* No Error */
621         return 0;
622 }
623
624 uint16_t
625 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
626                 uint16_t nb_pkts)
627 {
628         struct ixgbe_tx_queue *txq;
629         struct ixgbe_tx_entry *sw_ring;
630         struct ixgbe_tx_entry *txe, *txn;
631         volatile union ixgbe_adv_tx_desc *txr;
632         volatile union ixgbe_adv_tx_desc *txd, *txp;
633         struct rte_mbuf     *tx_pkt;
634         struct rte_mbuf     *m_seg;
635         uint64_t buf_dma_addr;
636         uint32_t olinfo_status;
637         uint32_t cmd_type_len;
638         uint32_t pkt_len;
639         uint16_t slen;
640         uint64_t ol_flags;
641         uint16_t tx_id;
642         uint16_t tx_last;
643         uint16_t nb_tx;
644         uint16_t nb_used;
645         uint64_t tx_ol_req;
646         uint32_t ctx = 0;
647         uint32_t new_ctx;
648         union ixgbe_tx_offload tx_offload;
649 #ifdef RTE_LIB_SECURITY
650         uint8_t use_ipsec;
651 #endif
652
653         tx_offload.data[0] = 0;
654         tx_offload.data[1] = 0;
655         txq = tx_queue;
656         sw_ring = txq->sw_ring;
657         txr     = txq->tx_ring;
658         tx_id   = txq->tx_tail;
659         txe = &sw_ring[tx_id];
660         txp = NULL;
661
662         /* Determine if the descriptor ring needs to be cleaned. */
663         if (txq->nb_tx_free < txq->tx_free_thresh)
664                 ixgbe_xmit_cleanup(txq);
665
666         rte_prefetch0(&txe->mbuf->pool);
667
668         /* TX loop */
669         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
670                 new_ctx = 0;
671                 tx_pkt = *tx_pkts++;
672                 pkt_len = tx_pkt->pkt_len;
673
674                 /*
675                  * Determine how many (if any) context descriptors
676                  * are needed for offload functionality.
677                  */
678                 ol_flags = tx_pkt->ol_flags;
679 #ifdef RTE_LIB_SECURITY
680                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
681 #endif
682
683                 /* If hardware offload required */
684                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
685                 if (tx_ol_req) {
686                         tx_offload.l2_len = tx_pkt->l2_len;
687                         tx_offload.l3_len = tx_pkt->l3_len;
688                         tx_offload.l4_len = tx_pkt->l4_len;
689                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
690                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
691                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
692                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
693 #ifdef RTE_LIB_SECURITY
694                         if (use_ipsec) {
695                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
696                                         (union ixgbe_crypto_tx_desc_md *)
697                                                 rte_security_dynfield(tx_pkt);
698                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
699                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
700                         }
701 #endif
702
703                         /* If new context need be built or reuse the exist ctx. */
704                         ctx = what_advctx_update(txq, tx_ol_req,
705                                 tx_offload);
706                         /* Only allocate context descriptor if required*/
707                         new_ctx = (ctx == IXGBE_CTX_NUM);
708                         ctx = txq->ctx_curr;
709                 }
710
711                 /*
712                  * Keep track of how many descriptors are used this loop
713                  * This will always be the number of segments + the number of
714                  * Context descriptors required to transmit the packet
715                  */
716                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
717
718                 if (txp != NULL &&
719                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
720                         /* set RS on the previous packet in the burst */
721                         txp->read.cmd_type_len |=
722                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
723
724                 /*
725                  * The number of descriptors that must be allocated for a
726                  * packet is the number of segments of that packet, plus 1
727                  * Context Descriptor for the hardware offload, if any.
728                  * Determine the last TX descriptor to allocate in the TX ring
729                  * for the packet, starting from the current position (tx_id)
730                  * in the ring.
731                  */
732                 tx_last = (uint16_t) (tx_id + nb_used - 1);
733
734                 /* Circular ring */
735                 if (tx_last >= txq->nb_tx_desc)
736                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
737
738                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
739                            " tx_first=%u tx_last=%u",
740                            (unsigned) txq->port_id,
741                            (unsigned) txq->queue_id,
742                            (unsigned) pkt_len,
743                            (unsigned) tx_id,
744                            (unsigned) tx_last);
745
746                 /*
747                  * Make sure there are enough TX descriptors available to
748                  * transmit the entire packet.
749                  * nb_used better be less than or equal to txq->tx_rs_thresh
750                  */
751                 if (nb_used > txq->nb_tx_free) {
752                         PMD_TX_LOG(DEBUG,
753                                    "Not enough free TX descriptors "
754                                    "nb_used=%4u nb_free=%4u "
755                                    "(port=%d queue=%d)",
756                                    nb_used, txq->nb_tx_free,
757                                    txq->port_id, txq->queue_id);
758
759                         if (ixgbe_xmit_cleanup(txq) != 0) {
760                                 /* Could not clean any descriptors */
761                                 if (nb_tx == 0)
762                                         return 0;
763                                 goto end_of_tx;
764                         }
765
766                         /* nb_used better be <= txq->tx_rs_thresh */
767                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
768                                 PMD_TX_LOG(DEBUG,
769                                            "The number of descriptors needed to "
770                                            "transmit the packet exceeds the "
771                                            "RS bit threshold. This will impact "
772                                            "performance."
773                                            "nb_used=%4u nb_free=%4u "
774                                            "tx_rs_thresh=%4u. "
775                                            "(port=%d queue=%d)",
776                                            nb_used, txq->nb_tx_free,
777                                            txq->tx_rs_thresh,
778                                            txq->port_id, txq->queue_id);
779                                 /*
780                                  * Loop here until there are enough TX
781                                  * descriptors or until the ring cannot be
782                                  * cleaned.
783                                  */
784                                 while (nb_used > txq->nb_tx_free) {
785                                         if (ixgbe_xmit_cleanup(txq) != 0) {
786                                                 /*
787                                                  * Could not clean any
788                                                  * descriptors
789                                                  */
790                                                 if (nb_tx == 0)
791                                                         return 0;
792                                                 goto end_of_tx;
793                                         }
794                                 }
795                         }
796                 }
797
798                 /*
799                  * By now there are enough free TX descriptors to transmit
800                  * the packet.
801                  */
802
803                 /*
804                  * Set common flags of all TX Data Descriptors.
805                  *
806                  * The following bits must be set in all Data Descriptors:
807                  *   - IXGBE_ADVTXD_DTYP_DATA
808                  *   - IXGBE_ADVTXD_DCMD_DEXT
809                  *
810                  * The following bits must be set in the first Data Descriptor
811                  * and are ignored in the other ones:
812                  *   - IXGBE_ADVTXD_DCMD_IFCS
813                  *   - IXGBE_ADVTXD_MAC_1588
814                  *   - IXGBE_ADVTXD_DCMD_VLE
815                  *
816                  * The following bits must only be set in the last Data
817                  * Descriptor:
818                  *   - IXGBE_TXD_CMD_EOP
819                  *
820                  * The following bits can be set in any Data Descriptor, but
821                  * are only set in the last Data Descriptor:
822                  *   - IXGBE_TXD_CMD_RS
823                  */
824                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
825                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
826
827 #ifdef RTE_LIBRTE_IEEE1588
828                 if (ol_flags & PKT_TX_IEEE1588_TMST)
829                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
830 #endif
831
832                 olinfo_status = 0;
833                 if (tx_ol_req) {
834
835                         if (ol_flags & PKT_TX_TCP_SEG) {
836                                 /* when TSO is on, paylen in descriptor is the
837                                  * not the packet len but the tcp payload len */
838                                 pkt_len -= (tx_offload.l2_len +
839                                         tx_offload.l3_len + tx_offload.l4_len);
840                         }
841
842                         /*
843                          * Setup the TX Advanced Context Descriptor if required
844                          */
845                         if (new_ctx) {
846                                 volatile struct ixgbe_adv_tx_context_desc *
847                                     ctx_txd;
848
849                                 ctx_txd = (volatile struct
850                                     ixgbe_adv_tx_context_desc *)
851                                     &txr[tx_id];
852
853                                 txn = &sw_ring[txe->next_id];
854                                 rte_prefetch0(&txn->mbuf->pool);
855
856                                 if (txe->mbuf != NULL) {
857                                         rte_pktmbuf_free_seg(txe->mbuf);
858                                         txe->mbuf = NULL;
859                                 }
860
861                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
862                                         tx_offload,
863                                         rte_security_dynfield(tx_pkt));
864
865                                 txe->last_id = tx_last;
866                                 tx_id = txe->next_id;
867                                 txe = txn;
868                         }
869
870                         /*
871                          * Setup the TX Advanced Data Descriptor,
872                          * This path will go through
873                          * whatever new/reuse the context descriptor
874                          */
875                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
876                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
877                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
878                 }
879
880                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
881 #ifdef RTE_LIB_SECURITY
882                 if (use_ipsec)
883                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
884 #endif
885
886                 m_seg = tx_pkt;
887                 do {
888                         txd = &txr[tx_id];
889                         txn = &sw_ring[txe->next_id];
890                         rte_prefetch0(&txn->mbuf->pool);
891
892                         if (txe->mbuf != NULL)
893                                 rte_pktmbuf_free_seg(txe->mbuf);
894                         txe->mbuf = m_seg;
895
896                         /*
897                          * Set up Transmit Data Descriptor.
898                          */
899                         slen = m_seg->data_len;
900                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
901                         txd->read.buffer_addr =
902                                 rte_cpu_to_le_64(buf_dma_addr);
903                         txd->read.cmd_type_len =
904                                 rte_cpu_to_le_32(cmd_type_len | slen);
905                         txd->read.olinfo_status =
906                                 rte_cpu_to_le_32(olinfo_status);
907                         txe->last_id = tx_last;
908                         tx_id = txe->next_id;
909                         txe = txn;
910                         m_seg = m_seg->next;
911                 } while (m_seg != NULL);
912
913                 /*
914                  * The last packet data descriptor needs End Of Packet (EOP)
915                  */
916                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
917                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
918                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
919
920                 /* Set RS bit only on threshold packets' last descriptor */
921                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
922                         PMD_TX_LOG(DEBUG,
923                                    "Setting RS bit on TXD id="
924                                    "%4u (port=%d queue=%d)",
925                                    tx_last, txq->port_id, txq->queue_id);
926
927                         cmd_type_len |= IXGBE_TXD_CMD_RS;
928
929                         /* Update txq RS bit counters */
930                         txq->nb_tx_used = 0;
931                         txp = NULL;
932                 } else
933                         txp = txd;
934
935                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
936         }
937
938 end_of_tx:
939         /* set RS on last packet in the burst */
940         if (txp != NULL)
941                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
942
943         rte_wmb();
944
945         /*
946          * Set the Transmit Descriptor Tail (TDT)
947          */
948         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
949                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
950                    (unsigned) tx_id, (unsigned) nb_tx);
951         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
952         txq->tx_tail = tx_id;
953
954         return nb_tx;
955 }
956
957 /*********************************************************************
958  *
959  *  TX prep functions
960  *
961  **********************************************************************/
962 uint16_t
963 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
964 {
965         int i, ret;
966         uint64_t ol_flags;
967         struct rte_mbuf *m;
968         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
969
970         for (i = 0; i < nb_pkts; i++) {
971                 m = tx_pkts[i];
972                 ol_flags = m->ol_flags;
973
974                 /**
975                  * Check if packet meets requirements for number of segments
976                  *
977                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
978                  *       non-TSO
979                  */
980
981                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
982                         rte_errno = EINVAL;
983                         return i;
984                 }
985
986                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
987                         rte_errno = ENOTSUP;
988                         return i;
989                 }
990
991                 /* check the size of packet */
992                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
993                         rte_errno = EINVAL;
994                         return i;
995                 }
996
997 #ifdef RTE_ETHDEV_DEBUG_TX
998                 ret = rte_validate_tx_offload(m);
999                 if (ret != 0) {
1000                         rte_errno = -ret;
1001                         return i;
1002                 }
1003 #endif
1004                 ret = rte_net_intel_cksum_prepare(m);
1005                 if (ret != 0) {
1006                         rte_errno = -ret;
1007                         return i;
1008                 }
1009         }
1010
1011         return i;
1012 }
1013
1014 /*********************************************************************
1015  *
1016  *  RX functions
1017  *
1018  **********************************************************************/
1019
1020 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1021 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1022 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1023 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1024 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1025 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1029 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1030 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1031 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1032 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1033 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1036 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1037 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1041 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1045 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1053
1054 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1077
1078 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1101
1102 /**
1103  * Use 2 different table for normal packet and tunnel packet
1104  * to save the space.
1105  */
1106 const uint32_t
1107         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1108         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1109         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1110                 RTE_PTYPE_L3_IPV4,
1111         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1112                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1113         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1114                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1115         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1116                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1117         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1118                 RTE_PTYPE_L3_IPV4_EXT,
1119         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1120                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1121         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1122                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1123         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1124                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1125         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1126                 RTE_PTYPE_L3_IPV6,
1127         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1128                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1129         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1130                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1131         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1132                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1133         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1134                 RTE_PTYPE_L3_IPV6_EXT,
1135         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1136                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1137         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1138                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1139         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1140                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1141         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1142                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1143                 RTE_PTYPE_INNER_L3_IPV6,
1144         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1145                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1146                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1147         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1148                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1149         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1150         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1151                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1152                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1153         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1154                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1155                 RTE_PTYPE_INNER_L3_IPV6,
1156         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1157                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1158                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1159         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1160                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1161                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1162         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1163                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1164                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1165         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1166                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1167                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1168         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1169                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1170                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1171         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1172                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1173                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1174         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1175                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1176                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1177         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1178                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1179                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1180         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1181                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1182                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1183         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1184                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1185                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1186         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1187                 RTE_PTYPE_L2_ETHER |
1188                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1189                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1190 };
1191
1192 const uint32_t
1193         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1194         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1195                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1196                 RTE_PTYPE_INNER_L2_ETHER,
1197         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1198                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1199                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1200         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1201                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1202                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1203         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1204                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1205                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1206         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1207                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1209         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1210                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1211                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1212         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1213                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1214                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1215         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1216                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1217                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1218                 RTE_PTYPE_INNER_L4_TCP,
1219         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1220                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1222                 RTE_PTYPE_INNER_L4_TCP,
1223         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1224                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1225                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1226         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1227                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1228                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1229                 RTE_PTYPE_INNER_L4_TCP,
1230         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1231                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1232                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1233                 RTE_PTYPE_INNER_L3_IPV4,
1234         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1235                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1236                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1237                 RTE_PTYPE_INNER_L4_UDP,
1238         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1239                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1240                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1241                 RTE_PTYPE_INNER_L4_UDP,
1242         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1243                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1244                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1245                 RTE_PTYPE_INNER_L4_SCTP,
1246         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1247                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1248                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1249         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1250                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1251                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1252                 RTE_PTYPE_INNER_L4_UDP,
1253         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1254                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1255                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1256                 RTE_PTYPE_INNER_L4_SCTP,
1257         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1258                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1259                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1260                 RTE_PTYPE_INNER_L3_IPV4,
1261         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1262                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1263                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1264                 RTE_PTYPE_INNER_L4_SCTP,
1265         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1266                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1267                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1268                 RTE_PTYPE_INNER_L4_SCTP,
1269         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1270                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1271                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1272                 RTE_PTYPE_INNER_L4_TCP,
1273         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1274                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1275                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1276                 RTE_PTYPE_INNER_L4_UDP,
1277
1278         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1279                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1280                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1281         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1282                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                 RTE_PTYPE_INNER_L3_IPV4,
1285         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1286                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1287                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1288                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1289         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1290                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                 RTE_PTYPE_INNER_L3_IPV6,
1293         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1294                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                 RTE_PTYPE_INNER_L3_IPV4,
1297         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1298                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1301         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1302                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                 RTE_PTYPE_INNER_L3_IPV4,
1305         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1306                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1309         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1310                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1313         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1314                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1315                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1316                 RTE_PTYPE_INNER_L3_IPV4,
1317         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1318                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1321         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1322                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1323                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1324                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1325         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1326                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1329         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1330                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1333         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1334                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1335                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1336                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1337         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1338                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1339                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1340                 RTE_PTYPE_INNER_L3_IPV4,
1341         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1342                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1343                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1344                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1345         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1346                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1347                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1348                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1349         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1350                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1351                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1352                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1353         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1354                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1355                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1356                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1357         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1358                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1359                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1360                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1361         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1362                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1363                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1364                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1365         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1366                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1367                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1368                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1369 };
1370
1371 static int
1372 ixgbe_monitor_callback(const uint64_t value,
1373                 const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
1374 {
1375         const uint64_t m = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1376         /*
1377          * we expect the DD bit to be set to 1 if this descriptor was already
1378          * written to.
1379          */
1380         return (value & m) == m ? -1 : 0;
1381 }
1382
1383 int
1384 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1385 {
1386         volatile union ixgbe_adv_rx_desc *rxdp;
1387         struct ixgbe_rx_queue *rxq = rx_queue;
1388         uint16_t desc;
1389
1390         desc = rxq->rx_tail;
1391         rxdp = &rxq->rx_ring[desc];
1392         /* watch for changes in status bit */
1393         pmc->addr = &rxdp->wb.upper.status_error;
1394
1395         /* comparison callback */
1396         pmc->fn = ixgbe_monitor_callback;
1397
1398         /* the registers are 32-bit */
1399         pmc->size = sizeof(uint32_t);
1400
1401         return 0;
1402 }
1403
1404 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1405 static inline uint32_t
1406 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1407 {
1408
1409         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1410                 return RTE_PTYPE_UNKNOWN;
1411
1412         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1413
1414         /* For tunnel packet */
1415         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1416                 /* Remove the tunnel bit to save the space. */
1417                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1418                 return ptype_table_tn[pkt_info];
1419         }
1420
1421         /**
1422          * For x550, if it's not tunnel,
1423          * tunnel type bit should be set to 0.
1424          * Reuse 82599's mask.
1425          */
1426         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1427
1428         return ptype_table[pkt_info];
1429 }
1430
1431 static inline uint64_t
1432 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1433 {
1434         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1435                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1436                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1437                 PKT_RX_RSS_HASH, 0, 0, 0,
1438                 0, 0, 0,  PKT_RX_FDIR,
1439         };
1440 #ifdef RTE_LIBRTE_IEEE1588
1441         static uint64_t ip_pkt_etqf_map[8] = {
1442                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1443                 0, 0, 0, 0,
1444         };
1445
1446         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1447                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1448                                 ip_rss_types_map[pkt_info & 0XF];
1449         else
1450                 return ip_rss_types_map[pkt_info & 0XF];
1451 #else
1452         return ip_rss_types_map[pkt_info & 0XF];
1453 #endif
1454 }
1455
1456 static inline uint64_t
1457 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1458 {
1459         uint64_t pkt_flags;
1460
1461         /*
1462          * Check if VLAN present only.
1463          * Do not check whether L3/L4 rx checksum done by NIC or not,
1464          * That can be found from rte_eth_rxmode.offloads flag
1465          */
1466         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1467
1468 #ifdef RTE_LIBRTE_IEEE1588
1469         if (rx_status & IXGBE_RXD_STAT_TMST)
1470                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1471 #endif
1472         return pkt_flags;
1473 }
1474
1475 static inline uint64_t
1476 rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
1477                            uint8_t rx_udp_csum_zero_err)
1478 {
1479         uint64_t pkt_flags;
1480
1481         /*
1482          * Bit 31: IPE, IPv4 checksum error
1483          * Bit 30: L4I, L4I integrity error
1484          */
1485         static uint64_t error_to_pkt_flags_map[4] = {
1486                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1487                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1488                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1489                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1490         };
1491         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1492                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1493
1494         /* Mask out the bad UDP checksum error if the hardware has UDP zero
1495          * checksum error issue, so that the software application will then
1496          * have to recompute the checksum itself if needed.
1497          */
1498         if ((rx_status & IXGBE_RXDADV_ERR_TCPE) &&
1499             (pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) &&
1500             rx_udp_csum_zero_err)
1501                 pkt_flags &= ~PKT_RX_L4_CKSUM_BAD;
1502
1503         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1504             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1505                 pkt_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
1506         }
1507
1508 #ifdef RTE_LIB_SECURITY
1509         if (rx_status & IXGBE_RXD_STAT_SECP) {
1510                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1511                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1512                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1513         }
1514 #endif
1515
1516         return pkt_flags;
1517 }
1518
1519 /*
1520  * LOOK_AHEAD defines how many desc statuses to check beyond the
1521  * current descriptor.
1522  * It must be a pound define for optimal performance.
1523  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1524  * function only works with LOOK_AHEAD=8.
1525  */
1526 #define LOOK_AHEAD 8
1527 #if (LOOK_AHEAD != 8)
1528 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1529 #endif
1530 static inline int
1531 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1532 {
1533         volatile union ixgbe_adv_rx_desc *rxdp;
1534         struct ixgbe_rx_entry *rxep;
1535         struct rte_mbuf *mb;
1536         uint16_t pkt_len;
1537         uint64_t pkt_flags;
1538         int nb_dd;
1539         uint32_t s[LOOK_AHEAD];
1540         uint32_t pkt_info[LOOK_AHEAD];
1541         int i, j, nb_rx = 0;
1542         uint32_t status;
1543         uint64_t vlan_flags = rxq->vlan_flags;
1544
1545         /* get references to current descriptor and S/W ring entry */
1546         rxdp = &rxq->rx_ring[rxq->rx_tail];
1547         rxep = &rxq->sw_ring[rxq->rx_tail];
1548
1549         status = rxdp->wb.upper.status_error;
1550         /* check to make sure there is at least 1 packet to receive */
1551         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1552                 return 0;
1553
1554         /*
1555          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1556          * reference packets that are ready to be received.
1557          */
1558         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1559              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1560                 /* Read desc statuses backwards to avoid race condition */
1561                 for (j = 0; j < LOOK_AHEAD; j++)
1562                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1563
1564                 rte_smp_rmb();
1565
1566                 /* Compute how many status bits were set */
1567                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1568                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1569                         ;
1570
1571                 for (j = 0; j < nb_dd; j++)
1572                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1573                                                        lo_dword.data);
1574
1575                 nb_rx += nb_dd;
1576
1577                 /* Translate descriptor info to mbuf format */
1578                 for (j = 0; j < nb_dd; ++j) {
1579                         mb = rxep[j].mbuf;
1580                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1581                                   rxq->crc_len;
1582                         mb->data_len = pkt_len;
1583                         mb->pkt_len = pkt_len;
1584                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1585
1586                         /* convert descriptor fields to rte mbuf flags */
1587                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1588                                 vlan_flags);
1589                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j],
1590                                         (uint16_t)pkt_info[j],
1591                                         rxq->rx_udp_csum_zero_err);
1592                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1593                                         ((uint16_t)pkt_info[j]);
1594                         mb->ol_flags = pkt_flags;
1595                         mb->packet_type =
1596                                 ixgbe_rxd_pkt_info_to_pkt_type
1597                                         (pkt_info[j], rxq->pkt_type_mask);
1598
1599                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1600                                 mb->hash.rss = rte_le_to_cpu_32(
1601                                     rxdp[j].wb.lower.hi_dword.rss);
1602                         else if (pkt_flags & PKT_RX_FDIR) {
1603                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1604                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1605                                     IXGBE_ATR_HASH_MASK;
1606                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1607                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1608                         }
1609                 }
1610
1611                 /* Move mbuf pointers from the S/W ring to the stage */
1612                 for (j = 0; j < LOOK_AHEAD; ++j) {
1613                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1614                 }
1615
1616                 /* stop if all requested packets could not be received */
1617                 if (nb_dd != LOOK_AHEAD)
1618                         break;
1619         }
1620
1621         /* clear software ring entries so we can cleanup correctly */
1622         for (i = 0; i < nb_rx; ++i) {
1623                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1624         }
1625
1626
1627         return nb_rx;
1628 }
1629
1630 static inline int
1631 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1632 {
1633         volatile union ixgbe_adv_rx_desc *rxdp;
1634         struct ixgbe_rx_entry *rxep;
1635         struct rte_mbuf *mb;
1636         uint16_t alloc_idx;
1637         __le64 dma_addr;
1638         int diag, i;
1639
1640         /* allocate buffers in bulk directly into the S/W ring */
1641         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1642         rxep = &rxq->sw_ring[alloc_idx];
1643         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1644                                     rxq->rx_free_thresh);
1645         if (unlikely(diag != 0))
1646                 return -ENOMEM;
1647
1648         rxdp = &rxq->rx_ring[alloc_idx];
1649         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1650                 /* populate the static rte mbuf fields */
1651                 mb = rxep[i].mbuf;
1652                 if (reset_mbuf) {
1653                         mb->port = rxq->port_id;
1654                 }
1655
1656                 rte_mbuf_refcnt_set(mb, 1);
1657                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1658
1659                 /* populate the descriptors */
1660                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1661                 rxdp[i].read.hdr_addr = 0;
1662                 rxdp[i].read.pkt_addr = dma_addr;
1663         }
1664
1665         /* update state of internal queue structure */
1666         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1667         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1668                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1669
1670         /* no errors */
1671         return 0;
1672 }
1673
1674 static inline uint16_t
1675 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1676                          uint16_t nb_pkts)
1677 {
1678         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1679         int i;
1680
1681         /* how many packets are ready to return? */
1682         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1683
1684         /* copy mbuf pointers to the application's packet list */
1685         for (i = 0; i < nb_pkts; ++i)
1686                 rx_pkts[i] = stage[i];
1687
1688         /* update internal queue state */
1689         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1690         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1691
1692         return nb_pkts;
1693 }
1694
1695 static inline uint16_t
1696 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1697              uint16_t nb_pkts)
1698 {
1699         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1700         uint16_t nb_rx = 0;
1701
1702         /* Any previously recv'd pkts will be returned from the Rx stage */
1703         if (rxq->rx_nb_avail)
1704                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1705
1706         /* Scan the H/W ring for packets to receive */
1707         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1708
1709         /* update internal queue state */
1710         rxq->rx_next_avail = 0;
1711         rxq->rx_nb_avail = nb_rx;
1712         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1713
1714         /* if required, allocate new buffers to replenish descriptors */
1715         if (rxq->rx_tail > rxq->rx_free_trigger) {
1716                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1717
1718                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1719                         int i, j;
1720
1721                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1722                                    "queue_id=%u", (unsigned) rxq->port_id,
1723                                    (unsigned) rxq->queue_id);
1724
1725                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1726                                 rxq->rx_free_thresh;
1727
1728                         /*
1729                          * Need to rewind any previous receives if we cannot
1730                          * allocate new buffers to replenish the old ones.
1731                          */
1732                         rxq->rx_nb_avail = 0;
1733                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1734                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1735                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1736
1737                         return 0;
1738                 }
1739
1740                 /* update tail pointer */
1741                 rte_wmb();
1742                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1743                                             cur_free_trigger);
1744         }
1745
1746         if (rxq->rx_tail >= rxq->nb_rx_desc)
1747                 rxq->rx_tail = 0;
1748
1749         /* received any packets this loop? */
1750         if (rxq->rx_nb_avail)
1751                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1752
1753         return 0;
1754 }
1755
1756 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1757 uint16_t
1758 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1759                            uint16_t nb_pkts)
1760 {
1761         uint16_t nb_rx;
1762
1763         if (unlikely(nb_pkts == 0))
1764                 return 0;
1765
1766         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1767                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1768
1769         /* request is relatively large, chunk it up */
1770         nb_rx = 0;
1771         while (nb_pkts) {
1772                 uint16_t ret, n;
1773
1774                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1775                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1776                 nb_rx = (uint16_t)(nb_rx + ret);
1777                 nb_pkts = (uint16_t)(nb_pkts - ret);
1778                 if (ret < n)
1779                         break;
1780         }
1781
1782         return nb_rx;
1783 }
1784
1785 uint16_t
1786 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1787                 uint16_t nb_pkts)
1788 {
1789         struct ixgbe_rx_queue *rxq;
1790         volatile union ixgbe_adv_rx_desc *rx_ring;
1791         volatile union ixgbe_adv_rx_desc *rxdp;
1792         struct ixgbe_rx_entry *sw_ring;
1793         struct ixgbe_rx_entry *rxe;
1794         struct rte_mbuf *rxm;
1795         struct rte_mbuf *nmb;
1796         union ixgbe_adv_rx_desc rxd;
1797         uint64_t dma_addr;
1798         uint32_t staterr;
1799         uint32_t pkt_info;
1800         uint16_t pkt_len;
1801         uint16_t rx_id;
1802         uint16_t nb_rx;
1803         uint16_t nb_hold;
1804         uint64_t pkt_flags;
1805         uint64_t vlan_flags;
1806
1807         nb_rx = 0;
1808         nb_hold = 0;
1809         rxq = rx_queue;
1810         rx_id = rxq->rx_tail;
1811         rx_ring = rxq->rx_ring;
1812         sw_ring = rxq->sw_ring;
1813         vlan_flags = rxq->vlan_flags;
1814         while (nb_rx < nb_pkts) {
1815                 /*
1816                  * The order of operations here is important as the DD status
1817                  * bit must not be read after any other descriptor fields.
1818                  * rx_ring and rxdp are pointing to volatile data so the order
1819                  * of accesses cannot be reordered by the compiler. If they were
1820                  * not volatile, they could be reordered which could lead to
1821                  * using invalid descriptor fields when read from rxd.
1822                  */
1823                 rxdp = &rx_ring[rx_id];
1824                 staterr = rxdp->wb.upper.status_error;
1825                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1826                         break;
1827                 rxd = *rxdp;
1828
1829                 /*
1830                  * End of packet.
1831                  *
1832                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1833                  * is likely to be invalid and to be dropped by the various
1834                  * validation checks performed by the network stack.
1835                  *
1836                  * Allocate a new mbuf to replenish the RX ring descriptor.
1837                  * If the allocation fails:
1838                  *    - arrange for that RX descriptor to be the first one
1839                  *      being parsed the next time the receive function is
1840                  *      invoked [on the same queue].
1841                  *
1842                  *    - Stop parsing the RX ring and return immediately.
1843                  *
1844                  * This policy do not drop the packet received in the RX
1845                  * descriptor for which the allocation of a new mbuf failed.
1846                  * Thus, it allows that packet to be later retrieved if
1847                  * mbuf have been freed in the mean time.
1848                  * As a side effect, holding RX descriptors instead of
1849                  * systematically giving them back to the NIC may lead to
1850                  * RX ring exhaustion situations.
1851                  * However, the NIC can gracefully prevent such situations
1852                  * to happen by sending specific "back-pressure" flow control
1853                  * frames to its peer(s).
1854                  */
1855                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1856                            "ext_err_stat=0x%08x pkt_len=%u",
1857                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1858                            (unsigned) rx_id, (unsigned) staterr,
1859                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1860
1861                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1862                 if (nmb == NULL) {
1863                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1864                                    "queue_id=%u", (unsigned) rxq->port_id,
1865                                    (unsigned) rxq->queue_id);
1866                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1867                         break;
1868                 }
1869
1870                 nb_hold++;
1871                 rxe = &sw_ring[rx_id];
1872                 rx_id++;
1873                 if (rx_id == rxq->nb_rx_desc)
1874                         rx_id = 0;
1875
1876                 /* Prefetch next mbuf while processing current one. */
1877                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1878
1879                 /*
1880                  * When next RX descriptor is on a cache-line boundary,
1881                  * prefetch the next 4 RX descriptors and the next 8 pointers
1882                  * to mbufs.
1883                  */
1884                 if ((rx_id & 0x3) == 0) {
1885                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1886                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1887                 }
1888
1889                 rxm = rxe->mbuf;
1890                 rxe->mbuf = nmb;
1891                 dma_addr =
1892                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1893                 rxdp->read.hdr_addr = 0;
1894                 rxdp->read.pkt_addr = dma_addr;
1895
1896                 /*
1897                  * Initialize the returned mbuf.
1898                  * 1) setup generic mbuf fields:
1899                  *    - number of segments,
1900                  *    - next segment,
1901                  *    - packet length,
1902                  *    - RX port identifier.
1903                  * 2) integrate hardware offload data, if any:
1904                  *    - RSS flag & hash,
1905                  *    - IP checksum flag,
1906                  *    - VLAN TCI, if any,
1907                  *    - error flags.
1908                  */
1909                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1910                                       rxq->crc_len);
1911                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1912                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1913                 rxm->nb_segs = 1;
1914                 rxm->next = NULL;
1915                 rxm->pkt_len = pkt_len;
1916                 rxm->data_len = pkt_len;
1917                 rxm->port = rxq->port_id;
1918
1919                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1920                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1921                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1922
1923                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1924                 pkt_flags = pkt_flags |
1925                         rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
1926                                                    rxq->rx_udp_csum_zero_err);
1927                 pkt_flags = pkt_flags |
1928                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1929                 rxm->ol_flags = pkt_flags;
1930                 rxm->packet_type =
1931                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1932                                                        rxq->pkt_type_mask);
1933
1934                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1935                         rxm->hash.rss = rte_le_to_cpu_32(
1936                                                 rxd.wb.lower.hi_dword.rss);
1937                 else if (pkt_flags & PKT_RX_FDIR) {
1938                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1939                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1940                                         IXGBE_ATR_HASH_MASK;
1941                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1942                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1943                 }
1944                 /*
1945                  * Store the mbuf address into the next entry of the array
1946                  * of returned packets.
1947                  */
1948                 rx_pkts[nb_rx++] = rxm;
1949         }
1950         rxq->rx_tail = rx_id;
1951
1952         /*
1953          * If the number of free RX descriptors is greater than the RX free
1954          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1955          * register.
1956          * Update the RDT with the value of the last processed RX descriptor
1957          * minus 1, to guarantee that the RDT register is never equal to the
1958          * RDH register, which creates a "full" ring situtation from the
1959          * hardware point of view...
1960          */
1961         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1962         if (nb_hold > rxq->rx_free_thresh) {
1963                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1964                            "nb_hold=%u nb_rx=%u",
1965                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1966                            (unsigned) rx_id, (unsigned) nb_hold,
1967                            (unsigned) nb_rx);
1968                 rx_id = (uint16_t) ((rx_id == 0) ?
1969                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1970                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1971                 nb_hold = 0;
1972         }
1973         rxq->nb_rx_hold = nb_hold;
1974         return nb_rx;
1975 }
1976
1977 /**
1978  * Detect an RSC descriptor.
1979  */
1980 static inline uint32_t
1981 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1982 {
1983         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1984                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1985 }
1986
1987 /**
1988  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1989  *
1990  * Fill the following info in the HEAD buffer of the Rx cluster:
1991  *    - RX port identifier
1992  *    - hardware offload data, if any:
1993  *      - RSS flag & hash
1994  *      - IP checksum flag
1995  *      - VLAN TCI, if any
1996  *      - error flags
1997  * @head HEAD of the packet cluster
1998  * @desc HW descriptor to get data from
1999  * @rxq Pointer to the Rx queue
2000  */
2001 static inline void
2002 ixgbe_fill_cluster_head_buf(
2003         struct rte_mbuf *head,
2004         union ixgbe_adv_rx_desc *desc,
2005         struct ixgbe_rx_queue *rxq,
2006         uint32_t staterr)
2007 {
2008         uint32_t pkt_info;
2009         uint64_t pkt_flags;
2010
2011         head->port = rxq->port_id;
2012
2013         /* The vlan_tci field is only valid when PKT_RX_VLAN is
2014          * set in the pkt_flags field.
2015          */
2016         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
2017         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
2018         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
2019         pkt_flags |= rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
2020                                                 rxq->rx_udp_csum_zero_err);
2021         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2022         head->ol_flags = pkt_flags;
2023         head->packet_type =
2024                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2025
2026         if (likely(pkt_flags & PKT_RX_RSS_HASH))
2027                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2028         else if (pkt_flags & PKT_RX_FDIR) {
2029                 head->hash.fdir.hash =
2030                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2031                                                           & IXGBE_ATR_HASH_MASK;
2032                 head->hash.fdir.id =
2033                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2034         }
2035 }
2036
2037 /**
2038  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2039  *
2040  * @rx_queue Rx queue handle
2041  * @rx_pkts table of received packets
2042  * @nb_pkts size of rx_pkts table
2043  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2044  *
2045  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2046  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2047  *
2048  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2049  * 1) When non-EOP RSC completion arrives:
2050  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2051  *       segment's data length.
2052  *    b) Set the "next" pointer of the current segment to point to the segment
2053  *       at the NEXTP index.
2054  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2055  *       in the sw_rsc_ring.
2056  * 2) When EOP arrives we just update the cluster's total length and offload
2057  *    flags and deliver the cluster up to the upper layers. In our case - put it
2058  *    in the rx_pkts table.
2059  *
2060  * Returns the number of received packets/clusters (according to the "bulk
2061  * receive" interface).
2062  */
2063 static inline uint16_t
2064 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2065                     bool bulk_alloc)
2066 {
2067         struct ixgbe_rx_queue *rxq = rx_queue;
2068         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2069         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2070         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2071         uint16_t rx_id = rxq->rx_tail;
2072         uint16_t nb_rx = 0;
2073         uint16_t nb_hold = rxq->nb_rx_hold;
2074         uint16_t prev_id = rxq->rx_tail;
2075
2076         while (nb_rx < nb_pkts) {
2077                 bool eop;
2078                 struct ixgbe_rx_entry *rxe;
2079                 struct ixgbe_scattered_rx_entry *sc_entry;
2080                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2081                 struct ixgbe_rx_entry *next_rxe = NULL;
2082                 struct rte_mbuf *first_seg;
2083                 struct rte_mbuf *rxm;
2084                 struct rte_mbuf *nmb = NULL;
2085                 union ixgbe_adv_rx_desc rxd;
2086                 uint16_t data_len;
2087                 uint16_t next_id;
2088                 volatile union ixgbe_adv_rx_desc *rxdp;
2089                 uint32_t staterr;
2090
2091 next_desc:
2092                 /*
2093                  * The code in this whole file uses the volatile pointer to
2094                  * ensure the read ordering of the status and the rest of the
2095                  * descriptor fields (on the compiler level only!!!). This is so
2096                  * UGLY - why not to just use the compiler barrier instead? DPDK
2097                  * even has the rte_compiler_barrier() for that.
2098                  *
2099                  * But most importantly this is just wrong because this doesn't
2100                  * ensure memory ordering in a general case at all. For
2101                  * instance, DPDK is supposed to work on Power CPUs where
2102                  * compiler barrier may just not be enough!
2103                  *
2104                  * I tried to write only this function properly to have a
2105                  * starting point (as a part of an LRO/RSC series) but the
2106                  * compiler cursed at me when I tried to cast away the
2107                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2108                  * keeping it the way it is for now.
2109                  *
2110                  * The code in this file is broken in so many other places and
2111                  * will just not work on a big endian CPU anyway therefore the
2112                  * lines below will have to be revisited together with the rest
2113                  * of the ixgbe PMD.
2114                  *
2115                  * TODO:
2116                  *    - Get rid of "volatile" and let the compiler do its job.
2117                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2118                  *      memory ordering below.
2119                  */
2120                 rxdp = &rx_ring[rx_id];
2121                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2122
2123                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2124                         break;
2125
2126                 rxd = *rxdp;
2127
2128                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2129                                   "staterr=0x%x data_len=%u",
2130                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2131                            rte_le_to_cpu_16(rxd.wb.upper.length));
2132
2133                 if (!bulk_alloc) {
2134                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2135                         if (nmb == NULL) {
2136                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2137                                                   "port_id=%u queue_id=%u",
2138                                            rxq->port_id, rxq->queue_id);
2139
2140                                 rte_eth_devices[rxq->port_id].data->
2141                                                         rx_mbuf_alloc_failed++;
2142                                 break;
2143                         }
2144                 } else if (nb_hold > rxq->rx_free_thresh) {
2145                         uint16_t next_rdt = rxq->rx_free_trigger;
2146
2147                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2148                                 rte_wmb();
2149                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2150                                                         rxq->rdt_reg_addr,
2151                                                         next_rdt);
2152                                 nb_hold -= rxq->rx_free_thresh;
2153                         } else {
2154                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2155                                                   "port_id=%u queue_id=%u",
2156                                            rxq->port_id, rxq->queue_id);
2157
2158                                 rte_eth_devices[rxq->port_id].data->
2159                                                         rx_mbuf_alloc_failed++;
2160                                 break;
2161                         }
2162                 }
2163
2164                 nb_hold++;
2165                 rxe = &sw_ring[rx_id];
2166                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2167
2168                 next_id = rx_id + 1;
2169                 if (next_id == rxq->nb_rx_desc)
2170                         next_id = 0;
2171
2172                 /* Prefetch next mbuf while processing current one. */
2173                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2174
2175                 /*
2176                  * When next RX descriptor is on a cache-line boundary,
2177                  * prefetch the next 4 RX descriptors and the next 4 pointers
2178                  * to mbufs.
2179                  */
2180                 if ((next_id & 0x3) == 0) {
2181                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2182                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2183                 }
2184
2185                 rxm = rxe->mbuf;
2186
2187                 if (!bulk_alloc) {
2188                         __le64 dma =
2189                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2190                         /*
2191                          * Update RX descriptor with the physical address of the
2192                          * new data buffer of the new allocated mbuf.
2193                          */
2194                         rxe->mbuf = nmb;
2195
2196                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2197                         rxdp->read.hdr_addr = 0;
2198                         rxdp->read.pkt_addr = dma;
2199                 } else
2200                         rxe->mbuf = NULL;
2201
2202                 /*
2203                  * Set data length & data buffer address of mbuf.
2204                  */
2205                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2206                 rxm->data_len = data_len;
2207
2208                 if (!eop) {
2209                         uint16_t nextp_id;
2210                         /*
2211                          * Get next descriptor index:
2212                          *  - For RSC it's in the NEXTP field.
2213                          *  - For a scattered packet - it's just a following
2214                          *    descriptor.
2215                          */
2216                         if (ixgbe_rsc_count(&rxd))
2217                                 nextp_id =
2218                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2219                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2220                         else
2221                                 nextp_id = next_id;
2222
2223                         next_sc_entry = &sw_sc_ring[nextp_id];
2224                         next_rxe = &sw_ring[nextp_id];
2225                         rte_ixgbe_prefetch(next_rxe);
2226                 }
2227
2228                 sc_entry = &sw_sc_ring[rx_id];
2229                 first_seg = sc_entry->fbuf;
2230                 sc_entry->fbuf = NULL;
2231
2232                 /*
2233                  * If this is the first buffer of the received packet,
2234                  * set the pointer to the first mbuf of the packet and
2235                  * initialize its context.
2236                  * Otherwise, update the total length and the number of segments
2237                  * of the current scattered packet, and update the pointer to
2238                  * the last mbuf of the current packet.
2239                  */
2240                 if (first_seg == NULL) {
2241                         first_seg = rxm;
2242                         first_seg->pkt_len = data_len;
2243                         first_seg->nb_segs = 1;
2244                 } else {
2245                         first_seg->pkt_len += data_len;
2246                         first_seg->nb_segs++;
2247                 }
2248
2249                 prev_id = rx_id;
2250                 rx_id = next_id;
2251
2252                 /*
2253                  * If this is not the last buffer of the received packet, update
2254                  * the pointer to the first mbuf at the NEXTP entry in the
2255                  * sw_sc_ring and continue to parse the RX ring.
2256                  */
2257                 if (!eop && next_rxe) {
2258                         rxm->next = next_rxe->mbuf;
2259                         next_sc_entry->fbuf = first_seg;
2260                         goto next_desc;
2261                 }
2262
2263                 /* Initialize the first mbuf of the returned packet */
2264                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2265
2266                 /*
2267                  * Deal with the case, when HW CRC srip is disabled.
2268                  * That can't happen when LRO is enabled, but still could
2269                  * happen for scattered RX mode.
2270                  */
2271                 first_seg->pkt_len -= rxq->crc_len;
2272                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2273                         struct rte_mbuf *lp;
2274
2275                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2276                                 ;
2277
2278                         first_seg->nb_segs--;
2279                         lp->data_len -= rxq->crc_len - rxm->data_len;
2280                         lp->next = NULL;
2281                         rte_pktmbuf_free_seg(rxm);
2282                 } else
2283                         rxm->data_len -= rxq->crc_len;
2284
2285                 /* Prefetch data of first segment, if configured to do so. */
2286                 rte_packet_prefetch((char *)first_seg->buf_addr +
2287                         first_seg->data_off);
2288
2289                 /*
2290                  * Store the mbuf address into the next entry of the array
2291                  * of returned packets.
2292                  */
2293                 rx_pkts[nb_rx++] = first_seg;
2294         }
2295
2296         /*
2297          * Record index of the next RX descriptor to probe.
2298          */
2299         rxq->rx_tail = rx_id;
2300
2301         /*
2302          * If the number of free RX descriptors is greater than the RX free
2303          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2304          * register.
2305          * Update the RDT with the value of the last processed RX descriptor
2306          * minus 1, to guarantee that the RDT register is never equal to the
2307          * RDH register, which creates a "full" ring situtation from the
2308          * hardware point of view...
2309          */
2310         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2311                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2312                            "nb_hold=%u nb_rx=%u",
2313                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2314
2315                 rte_wmb();
2316                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2317                 nb_hold = 0;
2318         }
2319
2320         rxq->nb_rx_hold = nb_hold;
2321         return nb_rx;
2322 }
2323
2324 uint16_t
2325 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2326                                  uint16_t nb_pkts)
2327 {
2328         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2329 }
2330
2331 uint16_t
2332 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2333                                uint16_t nb_pkts)
2334 {
2335         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2336 }
2337
2338 /*********************************************************************
2339  *
2340  *  Queue management functions
2341  *
2342  **********************************************************************/
2343
2344 static void __rte_cold
2345 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2346 {
2347         unsigned i;
2348
2349         if (txq->sw_ring != NULL) {
2350                 for (i = 0; i < txq->nb_tx_desc; i++) {
2351                         if (txq->sw_ring[i].mbuf != NULL) {
2352                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2353                                 txq->sw_ring[i].mbuf = NULL;
2354                         }
2355                 }
2356         }
2357 }
2358
2359 static int
2360 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2361 {
2362         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2363         uint16_t i, tx_last, tx_id;
2364         uint16_t nb_tx_free_last;
2365         uint16_t nb_tx_to_clean;
2366         uint32_t pkt_cnt;
2367
2368         /* Start free mbuf from the next of tx_tail */
2369         tx_last = txq->tx_tail;
2370         tx_id  = swr_ring[tx_last].next_id;
2371
2372         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2373                 return 0;
2374
2375         nb_tx_to_clean = txq->nb_tx_free;
2376         nb_tx_free_last = txq->nb_tx_free;
2377         if (!free_cnt)
2378                 free_cnt = txq->nb_tx_desc;
2379
2380         /* Loop through swr_ring to count the amount of
2381          * freeable mubfs and packets.
2382          */
2383         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2384                 for (i = 0; i < nb_tx_to_clean &&
2385                         pkt_cnt < free_cnt &&
2386                         tx_id != tx_last; i++) {
2387                         if (swr_ring[tx_id].mbuf != NULL) {
2388                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2389                                 swr_ring[tx_id].mbuf = NULL;
2390
2391                                 /*
2392                                  * last segment in the packet,
2393                                  * increment packet count
2394                                  */
2395                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2396                         }
2397
2398                         tx_id = swr_ring[tx_id].next_id;
2399                 }
2400
2401                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2402                         txq->nb_tx_free || tx_id == tx_last)
2403                         break;
2404
2405                 if (pkt_cnt < free_cnt) {
2406                         if (ixgbe_xmit_cleanup(txq))
2407                                 break;
2408
2409                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2410                         nb_tx_free_last = txq->nb_tx_free;
2411                 }
2412         }
2413
2414         return (int)pkt_cnt;
2415 }
2416
2417 static int
2418 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2419                         uint32_t free_cnt)
2420 {
2421         int i, n, cnt;
2422
2423         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2424                 free_cnt = txq->nb_tx_desc;
2425
2426         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2427
2428         for (i = 0; i < cnt; i += n) {
2429                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2430                         break;
2431
2432                 n = ixgbe_tx_free_bufs(txq);
2433
2434                 if (n == 0)
2435                         break;
2436         }
2437
2438         return i;
2439 }
2440
2441 static int
2442 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2443                         uint32_t free_cnt __rte_unused)
2444 {
2445         return -ENOTSUP;
2446 }
2447
2448 int
2449 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2450 {
2451         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2452         if (txq->offloads == 0 &&
2453 #ifdef RTE_LIB_SECURITY
2454                         !(txq->using_ipsec) &&
2455 #endif
2456                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2457                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2458                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2459                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2460                                         txq->sw_ring_v != NULL)) {
2461                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2462                 } else {
2463                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2464                 }
2465         }
2466
2467         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2468 }
2469
2470 static void __rte_cold
2471 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2472 {
2473         if (txq != NULL &&
2474             txq->sw_ring != NULL)
2475                 rte_free(txq->sw_ring);
2476 }
2477
2478 static void __rte_cold
2479 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2480 {
2481         if (txq != NULL && txq->ops != NULL) {
2482                 txq->ops->release_mbufs(txq);
2483                 txq->ops->free_swring(txq);
2484                 rte_memzone_free(txq->mz);
2485                 rte_free(txq);
2486         }
2487 }
2488
2489 void __rte_cold
2490 ixgbe_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
2491 {
2492         ixgbe_tx_queue_release(dev->data->tx_queues[qid]);
2493 }
2494
2495 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2496 static void __rte_cold
2497 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2498 {
2499         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2500         struct ixgbe_tx_entry *txe = txq->sw_ring;
2501         uint16_t prev, i;
2502
2503         /* Zero out HW ring memory */
2504         for (i = 0; i < txq->nb_tx_desc; i++) {
2505                 txq->tx_ring[i] = zeroed_desc;
2506         }
2507
2508         /* Initialize SW ring entries */
2509         prev = (uint16_t) (txq->nb_tx_desc - 1);
2510         for (i = 0; i < txq->nb_tx_desc; i++) {
2511                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2512
2513                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2514                 txe[i].mbuf = NULL;
2515                 txe[i].last_id = i;
2516                 txe[prev].next_id = i;
2517                 prev = i;
2518         }
2519
2520         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2521         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2522
2523         txq->tx_tail = 0;
2524         txq->nb_tx_used = 0;
2525         /*
2526          * Always allow 1 descriptor to be un-allocated to avoid
2527          * a H/W race condition
2528          */
2529         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2530         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2531         txq->ctx_curr = 0;
2532         memset((void *)&txq->ctx_cache, 0,
2533                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2534 }
2535
2536 static const struct ixgbe_txq_ops def_txq_ops = {
2537         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2538         .free_swring = ixgbe_tx_free_swring,
2539         .reset = ixgbe_reset_tx_queue,
2540 };
2541
2542 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2543  * the queue parameters. Used in tx_queue_setup by primary process and then
2544  * in dev_init by secondary process when attaching to an existing ethdev.
2545  */
2546 void __rte_cold
2547 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2548 {
2549         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2550         if ((txq->offloads == 0) &&
2551 #ifdef RTE_LIB_SECURITY
2552                         !(txq->using_ipsec) &&
2553 #endif
2554                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2555                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2556                 dev->tx_pkt_prepare = NULL;
2557                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2558                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2559                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2560                                         ixgbe_txq_vec_setup(txq) == 0)) {
2561                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2562                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2563                 } else
2564                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2565         } else {
2566                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2567                 PMD_INIT_LOG(DEBUG,
2568                                 " - offloads = 0x%" PRIx64,
2569                                 txq->offloads);
2570                 PMD_INIT_LOG(DEBUG,
2571                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2572                                 (unsigned long)txq->tx_rs_thresh,
2573                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2574                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2575                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2576         }
2577 }
2578
2579 uint64_t
2580 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2581 {
2582         RTE_SET_USED(dev);
2583
2584         return 0;
2585 }
2586
2587 uint64_t
2588 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2589 {
2590         uint64_t tx_offload_capa;
2591         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2592
2593         tx_offload_capa =
2594                 RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
2595                 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM  |
2596                 RTE_ETH_TX_OFFLOAD_UDP_CKSUM   |
2597                 RTE_ETH_TX_OFFLOAD_TCP_CKSUM   |
2598                 RTE_ETH_TX_OFFLOAD_SCTP_CKSUM  |
2599                 RTE_ETH_TX_OFFLOAD_TCP_TSO     |
2600                 RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
2601
2602         if (hw->mac.type == ixgbe_mac_82599EB ||
2603             hw->mac.type == ixgbe_mac_X540)
2604                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_MACSEC_INSERT;
2605
2606         if (hw->mac.type == ixgbe_mac_X550 ||
2607             hw->mac.type == ixgbe_mac_X550EM_x ||
2608             hw->mac.type == ixgbe_mac_X550EM_a)
2609                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2610
2611 #ifdef RTE_LIB_SECURITY
2612         if (dev->security_ctx)
2613                 tx_offload_capa |= RTE_ETH_TX_OFFLOAD_SECURITY;
2614 #endif
2615         return tx_offload_capa;
2616 }
2617
2618 int __rte_cold
2619 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2620                          uint16_t queue_idx,
2621                          uint16_t nb_desc,
2622                          unsigned int socket_id,
2623                          const struct rte_eth_txconf *tx_conf)
2624 {
2625         const struct rte_memzone *tz;
2626         struct ixgbe_tx_queue *txq;
2627         struct ixgbe_hw     *hw;
2628         uint16_t tx_rs_thresh, tx_free_thresh;
2629         uint64_t offloads;
2630
2631         PMD_INIT_FUNC_TRACE();
2632         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2633
2634         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2635
2636         /*
2637          * Validate number of transmit descriptors.
2638          * It must not exceed hardware maximum, and must be multiple
2639          * of IXGBE_ALIGN.
2640          */
2641         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2642                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2643                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2644                 return -EINVAL;
2645         }
2646
2647         /*
2648          * The following two parameters control the setting of the RS bit on
2649          * transmit descriptors.
2650          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2651          * descriptors have been used.
2652          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2653          * descriptors are used or if the number of descriptors required
2654          * to transmit a packet is greater than the number of free TX
2655          * descriptors.
2656          * The following constraints must be satisfied:
2657          *  tx_rs_thresh must be greater than 0.
2658          *  tx_rs_thresh must be less than the size of the ring minus 2.
2659          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2660          *  tx_rs_thresh must be a divisor of the ring size.
2661          *  tx_free_thresh must be greater than 0.
2662          *  tx_free_thresh must be less than the size of the ring minus 3.
2663          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2664          * One descriptor in the TX ring is used as a sentinel to avoid a
2665          * H/W race condition, hence the maximum threshold constraints.
2666          * When set to zero use default values.
2667          */
2668         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2669                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2670         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2671         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2672                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2673         if (tx_conf->tx_rs_thresh > 0)
2674                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2675         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2676                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2677                              "exceed nb_desc. (tx_rs_thresh=%u "
2678                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2679                              (unsigned int)tx_rs_thresh,
2680                              (unsigned int)tx_free_thresh,
2681                              (unsigned int)nb_desc,
2682                              (int)dev->data->port_id,
2683                              (int)queue_idx);
2684                 return -(EINVAL);
2685         }
2686         if (tx_rs_thresh >= (nb_desc - 2)) {
2687                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2688                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2689                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2690                         (int)dev->data->port_id, (int)queue_idx);
2691                 return -(EINVAL);
2692         }
2693         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2694                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2695                         "(tx_rs_thresh=%u port=%d queue=%d)",
2696                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2697                         (int)dev->data->port_id, (int)queue_idx);
2698                 return -(EINVAL);
2699         }
2700         if (tx_free_thresh >= (nb_desc - 3)) {
2701                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2702                              "tx_free_thresh must be less than the number of "
2703                              "TX descriptors minus 3. (tx_free_thresh=%u "
2704                              "port=%d queue=%d)",
2705                              (unsigned int)tx_free_thresh,
2706                              (int)dev->data->port_id, (int)queue_idx);
2707                 return -(EINVAL);
2708         }
2709         if (tx_rs_thresh > tx_free_thresh) {
2710                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2711                              "tx_free_thresh. (tx_free_thresh=%u "
2712                              "tx_rs_thresh=%u port=%d queue=%d)",
2713                              (unsigned int)tx_free_thresh,
2714                              (unsigned int)tx_rs_thresh,
2715                              (int)dev->data->port_id,
2716                              (int)queue_idx);
2717                 return -(EINVAL);
2718         }
2719         if ((nb_desc % tx_rs_thresh) != 0) {
2720                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2721                              "number of TX descriptors. (tx_rs_thresh=%u "
2722                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2723                              (int)dev->data->port_id, (int)queue_idx);
2724                 return -(EINVAL);
2725         }
2726
2727         /*
2728          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2729          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2730          * by the NIC and all descriptors are written back after the NIC
2731          * accumulates WTHRESH descriptors.
2732          */
2733         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2734                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2735                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2736                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2737                              (int)dev->data->port_id, (int)queue_idx);
2738                 return -(EINVAL);
2739         }
2740
2741         /* Free memory prior to re-allocation if needed... */
2742         if (dev->data->tx_queues[queue_idx] != NULL) {
2743                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2744                 dev->data->tx_queues[queue_idx] = NULL;
2745         }
2746
2747         /* First allocate the tx queue data structure */
2748         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2749                                  RTE_CACHE_LINE_SIZE, socket_id);
2750         if (txq == NULL)
2751                 return -ENOMEM;
2752
2753         /*
2754          * Allocate TX ring hardware descriptors. A memzone large enough to
2755          * handle the maximum ring size is allocated in order to allow for
2756          * resizing in later calls to the queue setup function.
2757          */
2758         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2759                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2760                         IXGBE_ALIGN, socket_id);
2761         if (tz == NULL) {
2762                 ixgbe_tx_queue_release(txq);
2763                 return -ENOMEM;
2764         }
2765
2766         txq->mz = tz;
2767         txq->nb_tx_desc = nb_desc;
2768         txq->tx_rs_thresh = tx_rs_thresh;
2769         txq->tx_free_thresh = tx_free_thresh;
2770         txq->pthresh = tx_conf->tx_thresh.pthresh;
2771         txq->hthresh = tx_conf->tx_thresh.hthresh;
2772         txq->wthresh = tx_conf->tx_thresh.wthresh;
2773         txq->queue_id = queue_idx;
2774         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2775                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2776         txq->port_id = dev->data->port_id;
2777         txq->offloads = offloads;
2778         txq->ops = &def_txq_ops;
2779         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2780 #ifdef RTE_LIB_SECURITY
2781         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2782                         RTE_ETH_TX_OFFLOAD_SECURITY);
2783 #endif
2784
2785         /*
2786          * Modification to set VFTDT for virtual function if vf is detected
2787          */
2788         if (hw->mac.type == ixgbe_mac_82599_vf ||
2789             hw->mac.type == ixgbe_mac_X540_vf ||
2790             hw->mac.type == ixgbe_mac_X550_vf ||
2791             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2792             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2793                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2794         else
2795                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2796
2797         txq->tx_ring_phys_addr = tz->iova;
2798         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2799
2800         /* Allocate software ring */
2801         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2802                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2803                                 RTE_CACHE_LINE_SIZE, socket_id);
2804         if (txq->sw_ring == NULL) {
2805                 ixgbe_tx_queue_release(txq);
2806                 return -ENOMEM;
2807         }
2808         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2809                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2810
2811         /* set up vector or scalar TX function as appropriate */
2812         ixgbe_set_tx_function(dev, txq);
2813
2814         txq->ops->reset(txq);
2815
2816         dev->data->tx_queues[queue_idx] = txq;
2817
2818
2819         return 0;
2820 }
2821
2822 /**
2823  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2824  *
2825  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2826  * in the sw_rsc_ring is not set to NULL but rather points to the next
2827  * mbuf of this RSC aggregation (that has not been completed yet and still
2828  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2829  * will just free first "nb_segs" segments of the cluster explicitly by calling
2830  * an rte_pktmbuf_free_seg().
2831  *
2832  * @m scattered cluster head
2833  */
2834 static void __rte_cold
2835 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2836 {
2837         uint16_t i, nb_segs = m->nb_segs;
2838         struct rte_mbuf *next_seg;
2839
2840         for (i = 0; i < nb_segs; i++) {
2841                 next_seg = m->next;
2842                 rte_pktmbuf_free_seg(m);
2843                 m = next_seg;
2844         }
2845 }
2846
2847 static void __rte_cold
2848 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2849 {
2850         unsigned i;
2851
2852         /* SSE Vector driver has a different way of releasing mbufs. */
2853         if (rxq->rx_using_sse) {
2854                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2855                 return;
2856         }
2857
2858         if (rxq->sw_ring != NULL) {
2859                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2860                         if (rxq->sw_ring[i].mbuf != NULL) {
2861                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2862                                 rxq->sw_ring[i].mbuf = NULL;
2863                         }
2864                 }
2865                 if (rxq->rx_nb_avail) {
2866                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2867                                 struct rte_mbuf *mb;
2868
2869                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2870                                 rte_pktmbuf_free_seg(mb);
2871                         }
2872                         rxq->rx_nb_avail = 0;
2873                 }
2874         }
2875
2876         if (rxq->sw_sc_ring)
2877                 for (i = 0; i < rxq->nb_rx_desc; i++)
2878                         if (rxq->sw_sc_ring[i].fbuf) {
2879                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2880                                 rxq->sw_sc_ring[i].fbuf = NULL;
2881                         }
2882 }
2883
2884 static void __rte_cold
2885 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2886 {
2887         if (rxq != NULL) {
2888                 ixgbe_rx_queue_release_mbufs(rxq);
2889                 rte_free(rxq->sw_ring);
2890                 rte_free(rxq->sw_sc_ring);
2891                 rte_memzone_free(rxq->mz);
2892                 rte_free(rxq);
2893         }
2894 }
2895
2896 void __rte_cold
2897 ixgbe_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
2898 {
2899         ixgbe_rx_queue_release(dev->data->rx_queues[qid]);
2900 }
2901
2902 /*
2903  * Check if Rx Burst Bulk Alloc function can be used.
2904  * Return
2905  *        0: the preconditions are satisfied and the bulk allocation function
2906  *           can be used.
2907  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2908  *           function must be used.
2909  */
2910 static inline int __rte_cold
2911 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2912 {
2913         int ret = 0;
2914
2915         /*
2916          * Make sure the following pre-conditions are satisfied:
2917          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2918          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2919          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2920          * Scattered packets are not supported.  This should be checked
2921          * outside of this function.
2922          */
2923         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2924                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2925                              "rxq->rx_free_thresh=%d, "
2926                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2927                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2928                 ret = -EINVAL;
2929         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2930                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2931                              "rxq->rx_free_thresh=%d, "
2932                              "rxq->nb_rx_desc=%d",
2933                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2934                 ret = -EINVAL;
2935         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2936                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2937                              "rxq->nb_rx_desc=%d, "
2938                              "rxq->rx_free_thresh=%d",
2939                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2940                 ret = -EINVAL;
2941         }
2942
2943         return ret;
2944 }
2945
2946 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2947 static void __rte_cold
2948 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2949 {
2950         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2951         unsigned i;
2952         uint16_t len = rxq->nb_rx_desc;
2953
2954         /*
2955          * By default, the Rx queue setup function allocates enough memory for
2956          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2957          * extra memory at the end of the descriptor ring to be zero'd out.
2958          */
2959         if (adapter->rx_bulk_alloc_allowed)
2960                 /* zero out extra memory */
2961                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2962
2963         /*
2964          * Zero out HW ring memory. Zero out extra memory at the end of
2965          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2966          * reads extra memory as zeros.
2967          */
2968         for (i = 0; i < len; i++) {
2969                 rxq->rx_ring[i] = zeroed_desc;
2970         }
2971
2972         /*
2973          * initialize extra software ring entries. Space for these extra
2974          * entries is always allocated
2975          */
2976         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2977         for (i = rxq->nb_rx_desc; i < len; ++i) {
2978                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2979         }
2980
2981         rxq->rx_nb_avail = 0;
2982         rxq->rx_next_avail = 0;
2983         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2984         rxq->rx_tail = 0;
2985         rxq->nb_rx_hold = 0;
2986
2987         if (rxq->pkt_first_seg != NULL)
2988                 rte_pktmbuf_free(rxq->pkt_first_seg);
2989
2990         rxq->pkt_first_seg = NULL;
2991         rxq->pkt_last_seg = NULL;
2992
2993 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2994         rxq->rxrearm_start = 0;
2995         rxq->rxrearm_nb = 0;
2996 #endif
2997 }
2998
2999 static int
3000 ixgbe_is_vf(struct rte_eth_dev *dev)
3001 {
3002         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3003
3004         switch (hw->mac.type) {
3005         case ixgbe_mac_82599_vf:
3006         case ixgbe_mac_X540_vf:
3007         case ixgbe_mac_X550_vf:
3008         case ixgbe_mac_X550EM_x_vf:
3009         case ixgbe_mac_X550EM_a_vf:
3010                 return 1;
3011         default:
3012                 return 0;
3013         }
3014 }
3015
3016 uint64_t
3017 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
3018 {
3019         uint64_t offloads = 0;
3020         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3021
3022         if (hw->mac.type != ixgbe_mac_82598EB)
3023                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3024
3025         return offloads;
3026 }
3027
3028 uint64_t
3029 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3030 {
3031         uint64_t offloads;
3032         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3033
3034         offloads = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM  |
3035                    RTE_ETH_RX_OFFLOAD_UDP_CKSUM   |
3036                    RTE_ETH_RX_OFFLOAD_TCP_CKSUM   |
3037                    RTE_ETH_RX_OFFLOAD_KEEP_CRC    |
3038                    RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
3039                    RTE_ETH_RX_OFFLOAD_SCATTER |
3040                    RTE_ETH_RX_OFFLOAD_RSS_HASH;
3041
3042         if (hw->mac.type == ixgbe_mac_82598EB)
3043                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
3044
3045         if (ixgbe_is_vf(dev) == 0)
3046                 offloads |= RTE_ETH_RX_OFFLOAD_VLAN_EXTEND;
3047
3048         /*
3049          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3050          * mode.
3051          */
3052         if ((hw->mac.type == ixgbe_mac_82599EB ||
3053              hw->mac.type == ixgbe_mac_X540 ||
3054              hw->mac.type == ixgbe_mac_X550) &&
3055             !RTE_ETH_DEV_SRIOV(dev).active)
3056                 offloads |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
3057
3058         if (hw->mac.type == ixgbe_mac_82599EB ||
3059             hw->mac.type == ixgbe_mac_X540)
3060                 offloads |= RTE_ETH_RX_OFFLOAD_MACSEC_STRIP;
3061
3062         if (hw->mac.type == ixgbe_mac_X550 ||
3063             hw->mac.type == ixgbe_mac_X550EM_x ||
3064             hw->mac.type == ixgbe_mac_X550EM_a)
3065                 offloads |= RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3066
3067 #ifdef RTE_LIB_SECURITY
3068         if (dev->security_ctx)
3069                 offloads |= RTE_ETH_RX_OFFLOAD_SECURITY;
3070 #endif
3071
3072         return offloads;
3073 }
3074
3075 int __rte_cold
3076 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3077                          uint16_t queue_idx,
3078                          uint16_t nb_desc,
3079                          unsigned int socket_id,
3080                          const struct rte_eth_rxconf *rx_conf,
3081                          struct rte_mempool *mp)
3082 {
3083         const struct rte_memzone *rz;
3084         struct ixgbe_rx_queue *rxq;
3085         struct ixgbe_hw     *hw;
3086         uint16_t len;
3087         struct ixgbe_adapter *adapter = dev->data->dev_private;
3088         uint64_t offloads;
3089
3090         PMD_INIT_FUNC_TRACE();
3091         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3092
3093         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3094
3095         /*
3096          * Validate number of receive descriptors.
3097          * It must not exceed hardware maximum, and must be multiple
3098          * of IXGBE_ALIGN.
3099          */
3100         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3101                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3102                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3103                 return -EINVAL;
3104         }
3105
3106         /* Free memory prior to re-allocation if needed... */
3107         if (dev->data->rx_queues[queue_idx] != NULL) {
3108                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3109                 dev->data->rx_queues[queue_idx] = NULL;
3110         }
3111
3112         /* First allocate the rx queue data structure */
3113         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3114                                  RTE_CACHE_LINE_SIZE, socket_id);
3115         if (rxq == NULL)
3116                 return -ENOMEM;
3117         rxq->mb_pool = mp;
3118         rxq->nb_rx_desc = nb_desc;
3119         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3120         rxq->queue_id = queue_idx;
3121         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3122                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3123         rxq->port_id = dev->data->port_id;
3124         if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
3125                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3126         else
3127                 rxq->crc_len = 0;
3128         rxq->drop_en = rx_conf->rx_drop_en;
3129         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3130         rxq->offloads = offloads;
3131
3132         /*
3133          * The packet type in RX descriptor is different for different NICs.
3134          * Some bits are used for x550 but reserved for other NICS.
3135          * So set different masks for different NICs.
3136          */
3137         if (hw->mac.type == ixgbe_mac_X550 ||
3138             hw->mac.type == ixgbe_mac_X550EM_x ||
3139             hw->mac.type == ixgbe_mac_X550EM_a ||
3140             hw->mac.type == ixgbe_mac_X550_vf ||
3141             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3142             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3143                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3144         else
3145                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3146
3147         /*
3148          * 82599 errata, UDP frames with a 0 checksum can be marked as checksum
3149          * errors.
3150          */
3151         if (hw->mac.type == ixgbe_mac_82599EB)
3152                 rxq->rx_udp_csum_zero_err = 1;
3153
3154         /*
3155          * Allocate RX ring hardware descriptors. A memzone large enough to
3156          * handle the maximum ring size is allocated in order to allow for
3157          * resizing in later calls to the queue setup function.
3158          */
3159         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3160                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3161         if (rz == NULL) {
3162                 ixgbe_rx_queue_release(rxq);
3163                 return -ENOMEM;
3164         }
3165
3166         rxq->mz = rz;
3167         /*
3168          * Zero init all the descriptors in the ring.
3169          */
3170         memset(rz->addr, 0, RX_RING_SZ);
3171
3172         /*
3173          * Modified to setup VFRDT for Virtual Function
3174          */
3175         if (hw->mac.type == ixgbe_mac_82599_vf ||
3176             hw->mac.type == ixgbe_mac_X540_vf ||
3177             hw->mac.type == ixgbe_mac_X550_vf ||
3178             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3179             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3180                 rxq->rdt_reg_addr =
3181                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3182                 rxq->rdh_reg_addr =
3183                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3184         } else {
3185                 rxq->rdt_reg_addr =
3186                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3187                 rxq->rdh_reg_addr =
3188                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3189         }
3190
3191         rxq->rx_ring_phys_addr = rz->iova;
3192         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3193
3194         /*
3195          * Certain constraints must be met in order to use the bulk buffer
3196          * allocation Rx burst function. If any of Rx queues doesn't meet them
3197          * the feature should be disabled for the whole port.
3198          */
3199         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3200                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3201                                     "preconditions - canceling the feature for "
3202                                     "the whole port[%d]",
3203                              rxq->queue_id, rxq->port_id);
3204                 adapter->rx_bulk_alloc_allowed = false;
3205         }
3206
3207         /*
3208          * Allocate software ring. Allow for space at the end of the
3209          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3210          * function does not access an invalid memory region.
3211          */
3212         len = nb_desc;
3213         if (adapter->rx_bulk_alloc_allowed)
3214                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3215
3216         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3217                                           sizeof(struct ixgbe_rx_entry) * len,
3218                                           RTE_CACHE_LINE_SIZE, socket_id);
3219         if (!rxq->sw_ring) {
3220                 ixgbe_rx_queue_release(rxq);
3221                 return -ENOMEM;
3222         }
3223
3224         /*
3225          * Always allocate even if it's not going to be needed in order to
3226          * simplify the code.
3227          *
3228          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3229          * be requested in ixgbe_dev_rx_init(), which is called later from
3230          * dev_start() flow.
3231          */
3232         rxq->sw_sc_ring =
3233                 rte_zmalloc_socket("rxq->sw_sc_ring",
3234                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3235                                    RTE_CACHE_LINE_SIZE, socket_id);
3236         if (!rxq->sw_sc_ring) {
3237                 ixgbe_rx_queue_release(rxq);
3238                 return -ENOMEM;
3239         }
3240
3241         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3242                             "dma_addr=0x%"PRIx64,
3243                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3244                      rxq->rx_ring_phys_addr);
3245
3246         if (!rte_is_power_of_2(nb_desc)) {
3247                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3248                                     "preconditions - canceling the feature for "
3249                                     "the whole port[%d]",
3250                              rxq->queue_id, rxq->port_id);
3251                 adapter->rx_vec_allowed = false;
3252         } else
3253                 ixgbe_rxq_vec_setup(rxq);
3254
3255         dev->data->rx_queues[queue_idx] = rxq;
3256
3257         ixgbe_reset_rx_queue(adapter, rxq);
3258
3259         return 0;
3260 }
3261
3262 uint32_t
3263 ixgbe_dev_rx_queue_count(void *rx_queue)
3264 {
3265 #define IXGBE_RXQ_SCAN_INTERVAL 4
3266         volatile union ixgbe_adv_rx_desc *rxdp;
3267         struct ixgbe_rx_queue *rxq;
3268         uint32_t desc = 0;
3269
3270         rxq = rx_queue;
3271         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3272
3273         while ((desc < rxq->nb_rx_desc) &&
3274                 (rxdp->wb.upper.status_error &
3275                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3276                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3277                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3278                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3279                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3280                                 desc - rxq->nb_rx_desc]);
3281         }
3282
3283         return desc;
3284 }
3285
3286 int
3287 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3288 {
3289         struct ixgbe_rx_queue *rxq = rx_queue;
3290         volatile uint32_t *status;
3291         uint32_t nb_hold, desc;
3292
3293         if (unlikely(offset >= rxq->nb_rx_desc))
3294                 return -EINVAL;
3295
3296 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3297         if (rxq->rx_using_sse)
3298                 nb_hold = rxq->rxrearm_nb;
3299         else
3300 #endif
3301                 nb_hold = rxq->nb_rx_hold;
3302         if (offset >= rxq->nb_rx_desc - nb_hold)
3303                 return RTE_ETH_RX_DESC_UNAVAIL;
3304
3305         desc = rxq->rx_tail + offset;
3306         if (desc >= rxq->nb_rx_desc)
3307                 desc -= rxq->nb_rx_desc;
3308
3309         status = &rxq->rx_ring[desc].wb.upper.status_error;
3310         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3311                 return RTE_ETH_RX_DESC_DONE;
3312
3313         return RTE_ETH_RX_DESC_AVAIL;
3314 }
3315
3316 int
3317 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3318 {
3319         struct ixgbe_tx_queue *txq = tx_queue;
3320         volatile uint32_t *status;
3321         uint32_t desc;
3322
3323         if (unlikely(offset >= txq->nb_tx_desc))
3324                 return -EINVAL;
3325
3326         desc = txq->tx_tail + offset;
3327         /* go to next desc that has the RS bit */
3328         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3329                 txq->tx_rs_thresh;
3330         if (desc >= txq->nb_tx_desc) {
3331                 desc -= txq->nb_tx_desc;
3332                 if (desc >= txq->nb_tx_desc)
3333                         desc -= txq->nb_tx_desc;
3334         }
3335
3336         status = &txq->tx_ring[desc].wb.status;
3337         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3338                 return RTE_ETH_TX_DESC_DONE;
3339
3340         return RTE_ETH_TX_DESC_FULL;
3341 }
3342
3343 /*
3344  * Set up link loopback for X540/X550 mode Tx->Rx.
3345  */
3346 static inline void __rte_cold
3347 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3348 {
3349         uint32_t macc;
3350         PMD_INIT_FUNC_TRACE();
3351
3352         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3353
3354         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3355                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3356         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3357
3358         if (enable) {
3359                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3360                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3361                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3362                 macc |= IXGBE_MACC_FLU;
3363         } else {
3364                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3365                 macc &= ~IXGBE_MACC_FLU;
3366         }
3367
3368         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3369                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3370
3371         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3372 }
3373
3374 void __rte_cold
3375 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3376 {
3377         unsigned i;
3378         struct ixgbe_adapter *adapter = dev->data->dev_private;
3379         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3380
3381         PMD_INIT_FUNC_TRACE();
3382
3383         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3384                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3385
3386                 if (txq != NULL) {
3387                         txq->ops->release_mbufs(txq);
3388                         txq->ops->reset(txq);
3389                 }
3390         }
3391
3392         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3393                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3394
3395                 if (rxq != NULL) {
3396                         ixgbe_rx_queue_release_mbufs(rxq);
3397                         ixgbe_reset_rx_queue(adapter, rxq);
3398                 }
3399         }
3400         /* If loopback mode was enabled, reconfigure the link accordingly */
3401         if (dev->data->dev_conf.lpbk_mode != 0) {
3402                 if (hw->mac.type == ixgbe_mac_X540 ||
3403                      hw->mac.type == ixgbe_mac_X550 ||
3404                      hw->mac.type == ixgbe_mac_X550EM_x ||
3405                      hw->mac.type == ixgbe_mac_X550EM_a)
3406                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3407         }
3408 }
3409
3410 void
3411 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3412 {
3413         unsigned i;
3414
3415         PMD_INIT_FUNC_TRACE();
3416
3417         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3418                 ixgbe_dev_rx_queue_release(dev, i);
3419                 dev->data->rx_queues[i] = NULL;
3420         }
3421         dev->data->nb_rx_queues = 0;
3422
3423         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3424                 ixgbe_dev_tx_queue_release(dev, i);
3425                 dev->data->tx_queues[i] = NULL;
3426         }
3427         dev->data->nb_tx_queues = 0;
3428 }
3429
3430 /*********************************************************************
3431  *
3432  *  Device RX/TX init functions
3433  *
3434  **********************************************************************/
3435
3436 /**
3437  * Receive Side Scaling (RSS)
3438  * See section 7.1.2.8 in the following document:
3439  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3440  *
3441  * Principles:
3442  * The source and destination IP addresses of the IP header and the source
3443  * and destination ports of TCP/UDP headers, if any, of received packets are
3444  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3445  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3446  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3447  * RSS output index which is used as the RX queue index where to store the
3448  * received packets.
3449  * The following output is supplied in the RX write-back descriptor:
3450  *     - 32-bit result of the Microsoft RSS hash function,
3451  *     - 4-bit RSS type field.
3452  */
3453
3454 /*
3455  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3456  * Used as the default key.
3457  */
3458 static uint8_t rss_intel_key[40] = {
3459         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3460         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3461         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3462         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3463         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3464 };
3465
3466 static void
3467 ixgbe_rss_disable(struct rte_eth_dev *dev)
3468 {
3469         struct ixgbe_hw *hw;
3470         uint32_t mrqc;
3471         uint32_t mrqc_reg;
3472
3473         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3474         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3475         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3476         mrqc &= ~IXGBE_MRQC_RSSEN;
3477         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3478 }
3479
3480 static void
3481 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3482 {
3483         uint8_t  *hash_key;
3484         uint32_t mrqc;
3485         uint32_t rss_key;
3486         uint64_t rss_hf;
3487         uint16_t i;
3488         uint32_t mrqc_reg;
3489         uint32_t rssrk_reg;
3490
3491         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3492         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3493
3494         hash_key = rss_conf->rss_key;
3495         if (hash_key != NULL) {
3496                 /* Fill in RSS hash key */
3497                 for (i = 0; i < 10; i++) {
3498                         rss_key  = hash_key[(i * 4)];
3499                         rss_key |= hash_key[(i * 4) + 1] << 8;
3500                         rss_key |= hash_key[(i * 4) + 2] << 16;
3501                         rss_key |= hash_key[(i * 4) + 3] << 24;
3502                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3503                 }
3504         }
3505
3506         /* Set configured hashing protocols in MRQC register */
3507         rss_hf = rss_conf->rss_hf;
3508         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3509         if (rss_hf & RTE_ETH_RSS_IPV4)
3510                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3511         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
3512                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3513         if (rss_hf & RTE_ETH_RSS_IPV6)
3514                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3515         if (rss_hf & RTE_ETH_RSS_IPV6_EX)
3516                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3517         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
3518                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3519         if (rss_hf & RTE_ETH_RSS_IPV6_TCP_EX)
3520                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3521         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
3522                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3523         if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
3524                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3525         if (rss_hf & RTE_ETH_RSS_IPV6_UDP_EX)
3526                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3527         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3528 }
3529
3530 int
3531 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3532                           struct rte_eth_rss_conf *rss_conf)
3533 {
3534         struct ixgbe_hw *hw;
3535         uint32_t mrqc;
3536         uint64_t rss_hf;
3537         uint32_t mrqc_reg;
3538
3539         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3540
3541         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3542                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3543                         "NIC.");
3544                 return -ENOTSUP;
3545         }
3546         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3547
3548         /*
3549          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3550          *     "RSS enabling cannot be done dynamically while it must be
3551          *      preceded by a software reset"
3552          * Before changing anything, first check that the update RSS operation
3553          * does not attempt to disable RSS, if RSS was enabled at
3554          * initialization time, or does not attempt to enable RSS, if RSS was
3555          * disabled at initialization time.
3556          */
3557         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3558         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3559         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3560                 if (rss_hf != 0) /* Enable RSS */
3561                         return -(EINVAL);
3562                 return 0; /* Nothing to do */
3563         }
3564         /* RSS enabled */
3565         if (rss_hf == 0) /* Disable RSS */
3566                 return -(EINVAL);
3567         ixgbe_hw_rss_hash_set(hw, rss_conf);
3568         return 0;
3569 }
3570
3571 int
3572 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3573                             struct rte_eth_rss_conf *rss_conf)
3574 {
3575         struct ixgbe_hw *hw;
3576         uint8_t *hash_key;
3577         uint32_t mrqc;
3578         uint32_t rss_key;
3579         uint64_t rss_hf;
3580         uint16_t i;
3581         uint32_t mrqc_reg;
3582         uint32_t rssrk_reg;
3583
3584         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3585         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3586         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3587         hash_key = rss_conf->rss_key;
3588         if (hash_key != NULL) {
3589                 /* Return RSS hash key */
3590                 for (i = 0; i < 10; i++) {
3591                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3592                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3593                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3594                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3595                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3596                 }
3597         }
3598
3599         /* Get RSS functions configured in MRQC register */
3600         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3601         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3602                 rss_conf->rss_hf = 0;
3603                 return 0;
3604         }
3605         rss_hf = 0;
3606         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3607                 rss_hf |= RTE_ETH_RSS_IPV4;
3608         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3609                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
3610         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3611                 rss_hf |= RTE_ETH_RSS_IPV6;
3612         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3613                 rss_hf |= RTE_ETH_RSS_IPV6_EX;
3614         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3615                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
3616         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3617                 rss_hf |= RTE_ETH_RSS_IPV6_TCP_EX;
3618         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3619                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
3620         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3621                 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
3622         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3623                 rss_hf |= RTE_ETH_RSS_IPV6_UDP_EX;
3624         rss_conf->rss_hf = rss_hf;
3625         return 0;
3626 }
3627
3628 static void
3629 ixgbe_rss_configure(struct rte_eth_dev *dev)
3630 {
3631         struct rte_eth_rss_conf rss_conf;
3632         struct ixgbe_adapter *adapter;
3633         struct ixgbe_hw *hw;
3634         uint32_t reta;
3635         uint16_t i;
3636         uint16_t j;
3637         uint16_t sp_reta_size;
3638         uint32_t reta_reg;
3639
3640         PMD_INIT_FUNC_TRACE();
3641         adapter = dev->data->dev_private;
3642         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3643
3644         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3645
3646         /*
3647          * Fill in redirection table
3648          * The byte-swap is needed because NIC registers are in
3649          * little-endian order.
3650          */
3651         if (adapter->rss_reta_updated == 0) {
3652                 reta = 0;
3653                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3654                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3655
3656                         if (j == dev->data->nb_rx_queues)
3657                                 j = 0;
3658                         reta = (reta << 8) | j;
3659                         if ((i & 3) == 3)
3660                                 IXGBE_WRITE_REG(hw, reta_reg,
3661                                                 rte_bswap32(reta));
3662                 }
3663         }
3664
3665         /*
3666          * Configure the RSS key and the RSS protocols used to compute
3667          * the RSS hash of input packets.
3668          */
3669         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3670         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3671                 ixgbe_rss_disable(dev);
3672                 return;
3673         }
3674         if (rss_conf.rss_key == NULL)
3675                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3676         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3677 }
3678
3679 #define NUM_VFTA_REGISTERS 128
3680 #define NIC_RX_BUFFER_SIZE 0x200
3681 #define X550_RX_BUFFER_SIZE 0x180
3682
3683 static void
3684 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3685 {
3686         struct rte_eth_vmdq_dcb_conf *cfg;
3687         struct ixgbe_hw *hw;
3688         enum rte_eth_nb_pools num_pools;
3689         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3690         uint16_t pbsize;
3691         uint8_t nb_tcs; /* number of traffic classes */
3692         int i;
3693
3694         PMD_INIT_FUNC_TRACE();
3695         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3696         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3697         num_pools = cfg->nb_queue_pools;
3698         /* Check we have a valid number of pools */
3699         if (num_pools != RTE_ETH_16_POOLS && num_pools != RTE_ETH_32_POOLS) {
3700                 ixgbe_rss_disable(dev);
3701                 return;
3702         }
3703         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3704         nb_tcs = (uint8_t)(RTE_ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3705
3706         /*
3707          * RXPBSIZE
3708          * split rx buffer up into sections, each for 1 traffic class
3709          */
3710         switch (hw->mac.type) {
3711         case ixgbe_mac_X550:
3712         case ixgbe_mac_X550EM_x:
3713         case ixgbe_mac_X550EM_a:
3714                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3715                 break;
3716         default:
3717                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3718                 break;
3719         }
3720         for (i = 0; i < nb_tcs; i++) {
3721                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3722
3723                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3724                 /* clear 10 bits. */
3725                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3726                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3727         }
3728         /* zero alloc all unused TCs */
3729         for (i = nb_tcs; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3730                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3731
3732                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3733                 /* clear 10 bits. */
3734                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3735         }
3736
3737         /* MRQC: enable vmdq and dcb */
3738         mrqc = (num_pools == RTE_ETH_16_POOLS) ?
3739                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3740         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3741
3742         /* PFVTCTL: turn on virtualisation and set the default pool */
3743         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3744         if (cfg->enable_default_pool) {
3745                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3746         } else {
3747                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3748         }
3749
3750         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3751
3752         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3753         queue_mapping = 0;
3754         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++)
3755                 /*
3756                  * mapping is done with 3 bits per priority,
3757                  * so shift by i*3 each time
3758                  */
3759                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3760
3761         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3762
3763         /* RTRPCS: DCB related */
3764         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3765
3766         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3767         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3768         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3769         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3770
3771         /* VFTA - enable all vlan filters */
3772         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3773                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3774         }
3775
3776         /* VFRE: pool enabling for receive - 16 or 32 */
3777         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3778                         num_pools == RTE_ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3779
3780         /*
3781          * MPSAR - allow pools to read specific mac addresses
3782          * In this case, all pools should be able to read from mac addr 0
3783          */
3784         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3785         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3786
3787         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3788         for (i = 0; i < cfg->nb_pool_maps; i++) {
3789                 /* set vlan id in VF register and set the valid bit */
3790                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3791                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3792                 /*
3793                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3794                  * pools, we only need to use the first half of the register
3795                  * i.e. bits 0-31
3796                  */
3797                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3798         }
3799 }
3800
3801 /**
3802  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3803  * @dev: pointer to eth_dev structure
3804  * @dcb_config: pointer to ixgbe_dcb_config structure
3805  */
3806 static void
3807 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3808                        struct ixgbe_dcb_config *dcb_config)
3809 {
3810         uint32_t reg;
3811         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3812
3813         PMD_INIT_FUNC_TRACE();
3814         if (hw->mac.type != ixgbe_mac_82598EB) {
3815                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3816                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3817                 reg |= IXGBE_RTTDCS_ARBDIS;
3818                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3819
3820                 /* Enable DCB for Tx with 8 TCs */
3821                 if (dcb_config->num_tcs.pg_tcs == 8) {
3822                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3823                 } else {
3824                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3825                 }
3826                 if (dcb_config->vt_mode)
3827                         reg |= IXGBE_MTQC_VT_ENA;
3828                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3829
3830                 /* Enable the Tx desc arbiter */
3831                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3832                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3833                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3834
3835                 /* Enable Security TX Buffer IFG for DCB */
3836                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3837                 reg |= IXGBE_SECTX_DCB;
3838                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3839         }
3840 }
3841
3842 /**
3843  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3844  * @dev: pointer to rte_eth_dev structure
3845  * @dcb_config: pointer to ixgbe_dcb_config structure
3846  */
3847 static void
3848 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3849                         struct ixgbe_dcb_config *dcb_config)
3850 {
3851         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3852                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3853         struct ixgbe_hw *hw =
3854                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3855
3856         PMD_INIT_FUNC_TRACE();
3857         if (hw->mac.type != ixgbe_mac_82598EB)
3858                 /*PF VF Transmit Enable*/
3859                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3860                         vmdq_tx_conf->nb_queue_pools == RTE_ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3861
3862         /*Configure general DCB TX parameters*/
3863         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3864 }
3865
3866 static void
3867 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3868                         struct ixgbe_dcb_config *dcb_config)
3869 {
3870         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3871                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3872         struct ixgbe_dcb_tc_config *tc;
3873         uint8_t i, j;
3874
3875         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3876         if (vmdq_rx_conf->nb_queue_pools == RTE_ETH_16_POOLS) {
3877                 dcb_config->num_tcs.pg_tcs = RTE_ETH_8_TCS;
3878                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_8_TCS;
3879         } else {
3880                 dcb_config->num_tcs.pg_tcs = RTE_ETH_4_TCS;
3881                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_4_TCS;
3882         }
3883
3884         /* Initialize User Priority to Traffic Class mapping */
3885         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3886                 tc = &dcb_config->tc_config[j];
3887                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3888         }
3889
3890         /* User Priority to Traffic Class mapping */
3891         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3892                 j = vmdq_rx_conf->dcb_tc[i];
3893                 tc = &dcb_config->tc_config[j];
3894                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3895                                                 (uint8_t)(1 << i);
3896         }
3897 }
3898
3899 static void
3900 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3901                         struct ixgbe_dcb_config *dcb_config)
3902 {
3903         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3904                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3905         struct ixgbe_dcb_tc_config *tc;
3906         uint8_t i, j;
3907
3908         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3909         if (vmdq_tx_conf->nb_queue_pools == RTE_ETH_16_POOLS) {
3910                 dcb_config->num_tcs.pg_tcs = RTE_ETH_8_TCS;
3911                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_8_TCS;
3912         } else {
3913                 dcb_config->num_tcs.pg_tcs = RTE_ETH_4_TCS;
3914                 dcb_config->num_tcs.pfc_tcs = RTE_ETH_4_TCS;
3915         }
3916
3917         /* Initialize User Priority to Traffic Class mapping */
3918         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3919                 tc = &dcb_config->tc_config[j];
3920                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3921         }
3922
3923         /* User Priority to Traffic Class mapping */
3924         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3925                 j = vmdq_tx_conf->dcb_tc[i];
3926                 tc = &dcb_config->tc_config[j];
3927                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3928                                                 (uint8_t)(1 << i);
3929         }
3930 }
3931
3932 static void
3933 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3934                 struct ixgbe_dcb_config *dcb_config)
3935 {
3936         struct rte_eth_dcb_rx_conf *rx_conf =
3937                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3938         struct ixgbe_dcb_tc_config *tc;
3939         uint8_t i, j;
3940
3941         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3942         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3943
3944         /* Initialize User Priority to Traffic Class mapping */
3945         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3946                 tc = &dcb_config->tc_config[j];
3947                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3948         }
3949
3950         /* User Priority to Traffic Class mapping */
3951         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3952                 j = rx_conf->dcb_tc[i];
3953                 tc = &dcb_config->tc_config[j];
3954                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3955                                                 (uint8_t)(1 << i);
3956         }
3957 }
3958
3959 static void
3960 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3961                 struct ixgbe_dcb_config *dcb_config)
3962 {
3963         struct rte_eth_dcb_tx_conf *tx_conf =
3964                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3965         struct ixgbe_dcb_tc_config *tc;
3966         uint8_t i, j;
3967
3968         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3969         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3970
3971         /* Initialize User Priority to Traffic Class mapping */
3972         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3973                 tc = &dcb_config->tc_config[j];
3974                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3975         }
3976
3977         /* User Priority to Traffic Class mapping */
3978         for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
3979                 j = tx_conf->dcb_tc[i];
3980                 tc = &dcb_config->tc_config[j];
3981                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3982                                                 (uint8_t)(1 << i);
3983         }
3984 }
3985
3986 /**
3987  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3988  * @dev: pointer to eth_dev structure
3989  * @dcb_config: pointer to ixgbe_dcb_config structure
3990  */
3991 static void
3992 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3993                        struct ixgbe_dcb_config *dcb_config)
3994 {
3995         uint32_t reg;
3996         uint32_t vlanctrl;
3997         uint8_t i;
3998         uint32_t q;
3999         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4000
4001         PMD_INIT_FUNC_TRACE();
4002         /*
4003          * Disable the arbiter before changing parameters
4004          * (always enable recycle mode; WSP)
4005          */
4006         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
4007         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4008
4009         if (hw->mac.type != ixgbe_mac_82598EB) {
4010                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
4011                 if (dcb_config->num_tcs.pg_tcs == 4) {
4012                         if (dcb_config->vt_mode)
4013                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4014                                         IXGBE_MRQC_VMDQRT4TCEN;
4015                         else {
4016                                 /* no matter the mode is DCB or DCB_RSS, just
4017                                  * set the MRQE to RSSXTCEN. RSS is controlled
4018                                  * by RSS_FIELD
4019                                  */
4020                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4021                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4022                                         IXGBE_MRQC_RTRSS4TCEN;
4023                         }
4024                 }
4025                 if (dcb_config->num_tcs.pg_tcs == 8) {
4026                         if (dcb_config->vt_mode)
4027                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4028                                         IXGBE_MRQC_VMDQRT8TCEN;
4029                         else {
4030                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4031                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4032                                         IXGBE_MRQC_RTRSS8TCEN;
4033                         }
4034                 }
4035
4036                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4037
4038                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4039                         /* Disable drop for all queues in VMDQ mode*/
4040                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4041                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4042                                                 (IXGBE_QDE_WRITE |
4043                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4044                 } else {
4045                         /* Enable drop for all queues in SRIOV mode */
4046                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4047                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4048                                                 (IXGBE_QDE_WRITE |
4049                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4050                                                  IXGBE_QDE_ENABLE));
4051                 }
4052         }
4053
4054         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4055         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4056         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4057         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4058
4059         /* VFTA - enable all vlan filters */
4060         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4061                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4062         }
4063
4064         /*
4065          * Configure Rx packet plane (recycle mode; WSP) and
4066          * enable arbiter
4067          */
4068         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4069         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4070 }
4071
4072 static void
4073 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4074                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4075 {
4076         switch (hw->mac.type) {
4077         case ixgbe_mac_82598EB:
4078                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4079                 break;
4080         case ixgbe_mac_82599EB:
4081         case ixgbe_mac_X540:
4082         case ixgbe_mac_X550:
4083         case ixgbe_mac_X550EM_x:
4084         case ixgbe_mac_X550EM_a:
4085                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4086                                                   tsa, map);
4087                 break;
4088         default:
4089                 break;
4090         }
4091 }
4092
4093 static void
4094 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4095                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4096 {
4097         switch (hw->mac.type) {
4098         case ixgbe_mac_82598EB:
4099                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4100                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4101                 break;
4102         case ixgbe_mac_82599EB:
4103         case ixgbe_mac_X540:
4104         case ixgbe_mac_X550:
4105         case ixgbe_mac_X550EM_x:
4106         case ixgbe_mac_X550EM_a:
4107                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4108                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4109                 break;
4110         default:
4111                 break;
4112         }
4113 }
4114
4115 #define DCB_RX_CONFIG  1
4116 #define DCB_TX_CONFIG  1
4117 #define DCB_TX_PB      1024
4118 /**
4119  * ixgbe_dcb_hw_configure - Enable DCB and configure
4120  * general DCB in VT mode and non-VT mode parameters
4121  * @dev: pointer to rte_eth_dev structure
4122  * @dcb_config: pointer to ixgbe_dcb_config structure
4123  */
4124 static int
4125 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4126                         struct ixgbe_dcb_config *dcb_config)
4127 {
4128         int     ret = 0;
4129         uint8_t i, pfc_en, nb_tcs;
4130         uint16_t pbsize, rx_buffer_size;
4131         uint8_t config_dcb_rx = 0;
4132         uint8_t config_dcb_tx = 0;
4133         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4134         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4135         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4136         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4137         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4138         struct ixgbe_dcb_tc_config *tc;
4139         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4140                 RTE_ETHER_CRC_LEN;
4141         struct ixgbe_hw *hw =
4142                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4143         struct ixgbe_bw_conf *bw_conf =
4144                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4145
4146         switch (dev->data->dev_conf.rxmode.mq_mode) {
4147         case RTE_ETH_MQ_RX_VMDQ_DCB:
4148                 dcb_config->vt_mode = true;
4149                 if (hw->mac.type != ixgbe_mac_82598EB) {
4150                         config_dcb_rx = DCB_RX_CONFIG;
4151                         /*
4152                          *get dcb and VT rx configuration parameters
4153                          *from rte_eth_conf
4154                          */
4155                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4156                         /*Configure general VMDQ and DCB RX parameters*/
4157                         ixgbe_vmdq_dcb_configure(dev);
4158                 }
4159                 break;
4160         case RTE_ETH_MQ_RX_DCB:
4161         case RTE_ETH_MQ_RX_DCB_RSS:
4162                 dcb_config->vt_mode = false;
4163                 config_dcb_rx = DCB_RX_CONFIG;
4164                 /* Get dcb TX configuration parameters from rte_eth_conf */
4165                 ixgbe_dcb_rx_config(dev, dcb_config);
4166                 /*Configure general DCB RX parameters*/
4167                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4168                 break;
4169         default:
4170                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4171                 break;
4172         }
4173         switch (dev->data->dev_conf.txmode.mq_mode) {
4174         case RTE_ETH_MQ_TX_VMDQ_DCB:
4175                 dcb_config->vt_mode = true;
4176                 config_dcb_tx = DCB_TX_CONFIG;
4177                 /* get DCB and VT TX configuration parameters
4178                  * from rte_eth_conf
4179                  */
4180                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4181                 /*Configure general VMDQ and DCB TX parameters*/
4182                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4183                 break;
4184
4185         case RTE_ETH_MQ_TX_DCB:
4186                 dcb_config->vt_mode = false;
4187                 config_dcb_tx = DCB_TX_CONFIG;
4188                 /*get DCB TX configuration parameters from rte_eth_conf*/
4189                 ixgbe_dcb_tx_config(dev, dcb_config);
4190                 /*Configure general DCB TX parameters*/
4191                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4192                 break;
4193         default:
4194                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4195                 break;
4196         }
4197
4198         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4199         /* Unpack map */
4200         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4201         if (nb_tcs == RTE_ETH_4_TCS) {
4202                 /* Avoid un-configured priority mapping to TC0 */
4203                 uint8_t j = 4;
4204                 uint8_t mask = 0xFF;
4205
4206                 for (i = 0; i < RTE_ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4207                         mask = (uint8_t)(mask & (~(1 << map[i])));
4208                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4209                         if ((mask & 0x1) && j < RTE_ETH_DCB_NUM_USER_PRIORITIES)
4210                                 map[j++] = i;
4211                         mask >>= 1;
4212                 }
4213                 /* Re-configure 4 TCs BW */
4214                 for (i = 0; i < nb_tcs; i++) {
4215                         tc = &dcb_config->tc_config[i];
4216                         if (bw_conf->tc_num != nb_tcs)
4217                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4218                                         (uint8_t)(100 / nb_tcs);
4219                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4220                                                 (uint8_t)(100 / nb_tcs);
4221                 }
4222                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4223                         tc = &dcb_config->tc_config[i];
4224                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4225                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4226                 }
4227         } else {
4228                 /* Re-configure 8 TCs BW */
4229                 for (i = 0; i < nb_tcs; i++) {
4230                         tc = &dcb_config->tc_config[i];
4231                         if (bw_conf->tc_num != nb_tcs)
4232                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4233                                         (uint8_t)(100 / nb_tcs + (i & 1));
4234                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4235                                 (uint8_t)(100 / nb_tcs + (i & 1));
4236                 }
4237         }
4238
4239         switch (hw->mac.type) {
4240         case ixgbe_mac_X550:
4241         case ixgbe_mac_X550EM_x:
4242         case ixgbe_mac_X550EM_a:
4243                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4244                 break;
4245         default:
4246                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4247                 break;
4248         }
4249
4250         if (config_dcb_rx) {
4251                 /* Set RX buffer size */
4252                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4253                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4254
4255                 for (i = 0; i < nb_tcs; i++) {
4256                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4257                 }
4258                 /* zero alloc all unused TCs */
4259                 for (; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++)
4260                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4261         }
4262         if (config_dcb_tx) {
4263                 /* Only support an equally distributed
4264                  *  Tx packet buffer strategy.
4265                  */
4266                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4267                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4268
4269                 for (i = 0; i < nb_tcs; i++) {
4270                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4271                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4272                 }
4273                 /* Clear unused TCs, if any, to zero buffer size*/
4274                 for (; i < RTE_ETH_DCB_NUM_USER_PRIORITIES; i++) {
4275                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4276                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4277                 }
4278         }
4279
4280         /*Calculates traffic class credits*/
4281         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4282                                 IXGBE_DCB_TX_CONFIG);
4283         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4284                                 IXGBE_DCB_RX_CONFIG);
4285
4286         if (config_dcb_rx) {
4287                 /* Unpack CEE standard containers */
4288                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4289                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4290                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4291                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4292                 /* Configure PG(ETS) RX */
4293                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4294         }
4295
4296         if (config_dcb_tx) {
4297                 /* Unpack CEE standard containers */
4298                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4299                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4300                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4301                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4302                 /* Configure PG(ETS) TX */
4303                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4304         }
4305
4306         /*Configure queue statistics registers*/
4307         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4308
4309         /* Check if the PFC is supported */
4310         if (dev->data->dev_conf.dcb_capability_en & RTE_ETH_DCB_PFC_SUPPORT) {
4311                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4312                 for (i = 0; i < nb_tcs; i++) {
4313                         /*
4314                         * If the TC count is 8,and the default high_water is 48,
4315                         * the low_water is 16 as default.
4316                         */
4317                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4318                         hw->fc.low_water[i] = pbsize / 4;
4319                         /* Enable pfc for this TC */
4320                         tc = &dcb_config->tc_config[i];
4321                         tc->pfc = ixgbe_dcb_pfc_enabled;
4322                 }
4323                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4324                 if (dcb_config->num_tcs.pfc_tcs == RTE_ETH_4_TCS)
4325                         pfc_en &= 0x0F;
4326                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4327         }
4328
4329         return ret;
4330 }
4331
4332 /**
4333  * ixgbe_configure_dcb - Configure DCB  Hardware
4334  * @dev: pointer to rte_eth_dev
4335  */
4336 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4337 {
4338         struct ixgbe_dcb_config *dcb_cfg =
4339                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4340         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4341
4342         PMD_INIT_FUNC_TRACE();
4343
4344         /* check support mq_mode for DCB */
4345         if (dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_VMDQ_DCB &&
4346             dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_DCB &&
4347             dev_conf->rxmode.mq_mode != RTE_ETH_MQ_RX_DCB_RSS)
4348                 return;
4349
4350         if (dev->data->nb_rx_queues > RTE_ETH_DCB_NUM_QUEUES)
4351                 return;
4352
4353         /** Configure DCB hardware **/
4354         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4355 }
4356
4357 /*
4358  * VMDq only support for 10 GbE NIC.
4359  */
4360 static void
4361 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4362 {
4363         struct rte_eth_vmdq_rx_conf *cfg;
4364         struct ixgbe_hw *hw;
4365         enum rte_eth_nb_pools num_pools;
4366         uint32_t mrqc, vt_ctl, vlanctrl;
4367         uint32_t vmolr = 0;
4368         int i;
4369
4370         PMD_INIT_FUNC_TRACE();
4371         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4372         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4373         num_pools = cfg->nb_queue_pools;
4374
4375         ixgbe_rss_disable(dev);
4376
4377         /* MRQC: enable vmdq */
4378         mrqc = IXGBE_MRQC_VMDQEN;
4379         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4380
4381         /* PFVTCTL: turn on virtualisation and set the default pool */
4382         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4383         if (cfg->enable_default_pool)
4384                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4385         else
4386                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4387
4388         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4389
4390         for (i = 0; i < (int)num_pools; i++) {
4391                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4392                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4393         }
4394
4395         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4396         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4397         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4398         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4399
4400         /* VFTA - enable all vlan filters */
4401         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4402                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4403
4404         /* VFRE: pool enabling for receive - 64 */
4405         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4406         if (num_pools == RTE_ETH_64_POOLS)
4407                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4408
4409         /*
4410          * MPSAR - allow pools to read specific mac addresses
4411          * In this case, all pools should be able to read from mac addr 0
4412          */
4413         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4414         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4415
4416         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4417         for (i = 0; i < cfg->nb_pool_maps; i++) {
4418                 /* set vlan id in VF register and set the valid bit */
4419                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4420                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4421                 /*
4422                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4423                  * pools, we only need to use the first half of the register
4424                  * i.e. bits 0-31
4425                  */
4426                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4427                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4428                                         (cfg->pool_map[i].pools & UINT32_MAX));
4429                 else
4430                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4431                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4432
4433         }
4434
4435         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4436         if (cfg->enable_loop_back) {
4437                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4438                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4439                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4440         }
4441
4442         IXGBE_WRITE_FLUSH(hw);
4443 }
4444
4445 /*
4446  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4447  * @hw: pointer to hardware structure
4448  */
4449 static void
4450 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4451 {
4452         uint32_t reg;
4453         uint32_t q;
4454
4455         PMD_INIT_FUNC_TRACE();
4456         /*PF VF Transmit Enable*/
4457         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4458         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4459
4460         /* Disable the Tx desc arbiter so that MTQC can be changed */
4461         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4462         reg |= IXGBE_RTTDCS_ARBDIS;
4463         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4464
4465         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4466         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4467
4468         /* Disable drop for all queues */
4469         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4470                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4471                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4472
4473         /* Enable the Tx desc arbiter */
4474         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4475         reg &= ~IXGBE_RTTDCS_ARBDIS;
4476         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4477
4478         IXGBE_WRITE_FLUSH(hw);
4479 }
4480
4481 static int __rte_cold
4482 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4483 {
4484         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4485         uint64_t dma_addr;
4486         unsigned int i;
4487
4488         /* Initialize software ring entries */
4489         for (i = 0; i < rxq->nb_rx_desc; i++) {
4490                 volatile union ixgbe_adv_rx_desc *rxd;
4491                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4492
4493                 if (mbuf == NULL) {
4494                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4495                                      (unsigned) rxq->queue_id);
4496                         return -ENOMEM;
4497                 }
4498
4499                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4500                 mbuf->port = rxq->port_id;
4501
4502                 dma_addr =
4503                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4504                 rxd = &rxq->rx_ring[i];
4505                 rxd->read.hdr_addr = 0;
4506                 rxd->read.pkt_addr = dma_addr;
4507                 rxe[i].mbuf = mbuf;
4508         }
4509
4510         return 0;
4511 }
4512
4513 static int
4514 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4515 {
4516         struct ixgbe_hw *hw;
4517         uint32_t mrqc;
4518
4519         ixgbe_rss_configure(dev);
4520
4521         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4522
4523         /* MRQC: enable VF RSS */
4524         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4525         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4526         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4527         case RTE_ETH_64_POOLS:
4528                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4529                 break;
4530
4531         case RTE_ETH_32_POOLS:
4532                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4533                 break;
4534
4535         default:
4536                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4537                 return -EINVAL;
4538         }
4539
4540         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4541
4542         return 0;
4543 }
4544
4545 static int
4546 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4547 {
4548         struct ixgbe_hw *hw =
4549                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4550
4551         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4552         case RTE_ETH_64_POOLS:
4553                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4554                         IXGBE_MRQC_VMDQEN);
4555                 break;
4556
4557         case RTE_ETH_32_POOLS:
4558                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4559                         IXGBE_MRQC_VMDQRT4TCEN);
4560                 break;
4561
4562         case RTE_ETH_16_POOLS:
4563                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4564                         IXGBE_MRQC_VMDQRT8TCEN);
4565                 break;
4566         default:
4567                 PMD_INIT_LOG(ERR,
4568                         "invalid pool number in IOV mode");
4569                 break;
4570         }
4571         return 0;
4572 }
4573
4574 static int
4575 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4576 {
4577         struct ixgbe_hw *hw =
4578                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4579
4580         if (hw->mac.type == ixgbe_mac_82598EB)
4581                 return 0;
4582
4583         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4584                 /*
4585                  * SRIOV inactive scheme
4586                  * any DCB/RSS w/o VMDq multi-queue setting
4587                  */
4588                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4589                 case RTE_ETH_MQ_RX_RSS:
4590                 case RTE_ETH_MQ_RX_DCB_RSS:
4591                 case RTE_ETH_MQ_RX_VMDQ_RSS:
4592                         ixgbe_rss_configure(dev);
4593                         break;
4594
4595                 case RTE_ETH_MQ_RX_VMDQ_DCB:
4596                         ixgbe_vmdq_dcb_configure(dev);
4597                         break;
4598
4599                 case RTE_ETH_MQ_RX_VMDQ_ONLY:
4600                         ixgbe_vmdq_rx_hw_configure(dev);
4601                         break;
4602
4603                 case RTE_ETH_MQ_RX_NONE:
4604                 default:
4605                         /* if mq_mode is none, disable rss mode.*/
4606                         ixgbe_rss_disable(dev);
4607                         break;
4608                 }
4609         } else {
4610                 /* SRIOV active scheme
4611                  * Support RSS together with SRIOV.
4612                  */
4613                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4614                 case RTE_ETH_MQ_RX_RSS:
4615                 case RTE_ETH_MQ_RX_VMDQ_RSS:
4616                         ixgbe_config_vf_rss(dev);
4617                         break;
4618                 case RTE_ETH_MQ_RX_VMDQ_DCB:
4619                 case RTE_ETH_MQ_RX_DCB:
4620                 /* In SRIOV, the configuration is the same as VMDq case */
4621                         ixgbe_vmdq_dcb_configure(dev);
4622                         break;
4623                 /* DCB/RSS together with SRIOV is not supported */
4624                 case RTE_ETH_MQ_RX_VMDQ_DCB_RSS:
4625                 case RTE_ETH_MQ_RX_DCB_RSS:
4626                         PMD_INIT_LOG(ERR,
4627                                 "Could not support DCB/RSS with VMDq & SRIOV");
4628                         return -1;
4629                 default:
4630                         ixgbe_config_vf_default(dev);
4631                         break;
4632                 }
4633         }
4634
4635         return 0;
4636 }
4637
4638 static int
4639 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4640 {
4641         struct ixgbe_hw *hw =
4642                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4643         uint32_t mtqc;
4644         uint32_t rttdcs;
4645
4646         if (hw->mac.type == ixgbe_mac_82598EB)
4647                 return 0;
4648
4649         /* disable arbiter before setting MTQC */
4650         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4651         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4652         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4653
4654         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4655                 /*
4656                  * SRIOV inactive scheme
4657                  * any DCB w/o VMDq multi-queue setting
4658                  */
4659                 if (dev->data->dev_conf.txmode.mq_mode == RTE_ETH_MQ_TX_VMDQ_ONLY)
4660                         ixgbe_vmdq_tx_hw_configure(hw);
4661                 else {
4662                         mtqc = IXGBE_MTQC_64Q_1PB;
4663                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4664                 }
4665         } else {
4666                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4667
4668                 /*
4669                  * SRIOV active scheme
4670                  * FIXME if support DCB together with VMDq & SRIOV
4671                  */
4672                 case RTE_ETH_64_POOLS:
4673                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4674                         break;
4675                 case RTE_ETH_32_POOLS:
4676                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4677                         break;
4678                 case RTE_ETH_16_POOLS:
4679                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4680                                 IXGBE_MTQC_8TC_8TQ;
4681                         break;
4682                 default:
4683                         mtqc = IXGBE_MTQC_64Q_1PB;
4684                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4685                 }
4686                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4687         }
4688
4689         /* re-enable arbiter */
4690         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4691         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4692
4693         return 0;
4694 }
4695
4696 /**
4697  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4698  *
4699  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4700  * spec rev. 3.0 chapter 8.2.3.8.13.
4701  *
4702  * @pool Memory pool of the Rx queue
4703  */
4704 static inline uint32_t
4705 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4706 {
4707         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4708
4709         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4710         uint16_t maxdesc =
4711                 RTE_IPV4_MAX_PKT_LEN /
4712                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4713
4714         if (maxdesc >= 16)
4715                 return IXGBE_RSCCTL_MAXDESC_16;
4716         else if (maxdesc >= 8)
4717                 return IXGBE_RSCCTL_MAXDESC_8;
4718         else if (maxdesc >= 4)
4719                 return IXGBE_RSCCTL_MAXDESC_4;
4720         else
4721                 return IXGBE_RSCCTL_MAXDESC_1;
4722 }
4723
4724 /**
4725  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4726  * interrupt
4727  *
4728  * (Taken from FreeBSD tree)
4729  * (yes this is all very magic and confusing :)
4730  *
4731  * @dev port handle
4732  * @entry the register array entry
4733  * @vector the MSIX vector for this queue
4734  * @type RX/TX/MISC
4735  */
4736 static void
4737 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4738 {
4739         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4740         u32 ivar, index;
4741
4742         vector |= IXGBE_IVAR_ALLOC_VAL;
4743
4744         switch (hw->mac.type) {
4745
4746         case ixgbe_mac_82598EB:
4747                 if (type == -1)
4748                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4749                 else
4750                         entry += (type * 64);
4751                 index = (entry >> 2) & 0x1F;
4752                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4753                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4754                 ivar |= (vector << (8 * (entry & 0x3)));
4755                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4756                 break;
4757
4758         case ixgbe_mac_82599EB:
4759         case ixgbe_mac_X540:
4760                 if (type == -1) { /* MISC IVAR */
4761                         index = (entry & 1) * 8;
4762                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4763                         ivar &= ~(0xFF << index);
4764                         ivar |= (vector << index);
4765                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4766                 } else {        /* RX/TX IVARS */
4767                         index = (16 * (entry & 1)) + (8 * type);
4768                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4769                         ivar &= ~(0xFF << index);
4770                         ivar |= (vector << index);
4771                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4772                 }
4773
4774                 break;
4775
4776         default:
4777                 break;
4778         }
4779 }
4780
4781 void __rte_cold
4782 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4783 {
4784         uint16_t i, rx_using_sse;
4785         struct ixgbe_adapter *adapter = dev->data->dev_private;
4786
4787         /*
4788          * In order to allow Vector Rx there are a few configuration
4789          * conditions to be met and Rx Bulk Allocation should be allowed.
4790          */
4791         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4792             !adapter->rx_bulk_alloc_allowed ||
4793                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4794                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4795                                     "preconditions",
4796                              dev->data->port_id);
4797
4798                 adapter->rx_vec_allowed = false;
4799         }
4800
4801         /*
4802          * Initialize the appropriate LRO callback.
4803          *
4804          * If all queues satisfy the bulk allocation preconditions
4805          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4806          * Otherwise use a single allocation version.
4807          */
4808         if (dev->data->lro) {
4809                 if (adapter->rx_bulk_alloc_allowed) {
4810                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4811                                            "allocation version");
4812                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4813                 } else {
4814                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4815                                            "allocation version");
4816                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4817                 }
4818         } else if (dev->data->scattered_rx) {
4819                 /*
4820                  * Set the non-LRO scattered callback: there are Vector and
4821                  * single allocation versions.
4822                  */
4823                 if (adapter->rx_vec_allowed) {
4824                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4825                                             "callback (port=%d).",
4826                                      dev->data->port_id);
4827
4828                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4829                 } else if (adapter->rx_bulk_alloc_allowed) {
4830                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4831                                            "allocation callback (port=%d).",
4832                                      dev->data->port_id);
4833                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4834                 } else {
4835                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4836                                             "single allocation) "
4837                                             "Scattered Rx callback "
4838                                             "(port=%d).",
4839                                      dev->data->port_id);
4840
4841                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4842                 }
4843         /*
4844          * Below we set "simple" callbacks according to port/queues parameters.
4845          * If parameters allow we are going to choose between the following
4846          * callbacks:
4847          *    - Vector
4848          *    - Bulk Allocation
4849          *    - Single buffer allocation (the simplest one)
4850          */
4851         } else if (adapter->rx_vec_allowed) {
4852                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4853                                     "burst size no less than %d (port=%d).",
4854                              RTE_IXGBE_DESCS_PER_LOOP,
4855                              dev->data->port_id);
4856
4857                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4858         } else if (adapter->rx_bulk_alloc_allowed) {
4859                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4860                                     "satisfied. Rx Burst Bulk Alloc function "
4861                                     "will be used on port=%d.",
4862                              dev->data->port_id);
4863
4864                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4865         } else {
4866                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4867                                     "satisfied, or Scattered Rx is requested "
4868                                     "(port=%d).",
4869                              dev->data->port_id);
4870
4871                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4872         }
4873
4874         /* Propagate information about RX function choice through all queues. */
4875
4876         rx_using_sse =
4877                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4878                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4879
4880         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4881                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4882
4883                 rxq->rx_using_sse = rx_using_sse;
4884 #ifdef RTE_LIB_SECURITY
4885                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4886                                 RTE_ETH_RX_OFFLOAD_SECURITY);
4887 #endif
4888         }
4889 }
4890
4891 /**
4892  * ixgbe_set_rsc - configure RSC related port HW registers
4893  *
4894  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4895  * of 82599 Spec (x540 configuration is virtually the same).
4896  *
4897  * @dev port handle
4898  *
4899  * Returns 0 in case of success or a non-zero error code
4900  */
4901 static int
4902 ixgbe_set_rsc(struct rte_eth_dev *dev)
4903 {
4904         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4905         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4906         struct rte_eth_dev_info dev_info = { 0 };
4907         bool rsc_capable = false;
4908         uint16_t i;
4909         uint32_t rdrxctl;
4910         uint32_t rfctl;
4911
4912         /* Sanity check */
4913         dev->dev_ops->dev_infos_get(dev, &dev_info);
4914         if (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO)
4915                 rsc_capable = true;
4916
4917         if (!rsc_capable && (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
4918                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4919                                    "support it");
4920                 return -EINVAL;
4921         }
4922
4923         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4924
4925         if ((rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) &&
4926              (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO)) {
4927                 /*
4928                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4929                  * 3.0 RSC configuration requires HW CRC stripping being
4930                  * enabled. If user requested both HW CRC stripping off
4931                  * and RSC on - return an error.
4932                  */
4933                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4934                                     "is disabled");
4935                 return -EINVAL;
4936         }
4937
4938         /* RFCTL configuration  */
4939         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4940         if ((rsc_capable) && (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO))
4941                 rfctl &= ~IXGBE_RFCTL_RSC_DIS;
4942         else
4943                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4944         /* disable NFS filtering */
4945         rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS;
4946         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4947
4948         /* If LRO hasn't been requested - we are done here. */
4949         if (!(rx_conf->offloads & RTE_ETH_RX_OFFLOAD_TCP_LRO))
4950                 return 0;
4951
4952         /* Set RDRXCTL.RSCACKC bit */
4953         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4954         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4955         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4956
4957         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4958         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4959                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4960                 uint32_t srrctl =
4961                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4962                 uint32_t rscctl =
4963                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4964                 uint32_t psrtype =
4965                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4966                 uint32_t eitr =
4967                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4968
4969                 /*
4970                  * ixgbe PMD doesn't support header-split at the moment.
4971                  *
4972                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4973                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4974                  * should be configured even if header split is not
4975                  * enabled. We will configure it 128 bytes following the
4976                  * recommendation in the spec.
4977                  */
4978                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4979                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4980                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4981
4982                 /*
4983                  * TODO: Consider setting the Receive Descriptor Minimum
4984                  * Threshold Size for an RSC case. This is not an obviously
4985                  * beneficiary option but the one worth considering...
4986                  */
4987
4988                 rscctl |= IXGBE_RSCCTL_RSCEN;
4989                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4990                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4991
4992                 /*
4993                  * RSC: Set ITR interval corresponding to 2K ints/s.
4994                  *
4995                  * Full-sized RSC aggregations for a 10Gb/s link will
4996                  * arrive at about 20K aggregation/s rate.
4997                  *
4998                  * 2K inst/s rate will make only 10% of the
4999                  * aggregations to be closed due to the interrupt timer
5000                  * expiration for a streaming at wire-speed case.
5001                  *
5002                  * For a sparse streaming case this setting will yield
5003                  * at most 500us latency for a single RSC aggregation.
5004                  */
5005                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
5006                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
5007                 eitr |= IXGBE_EITR_CNT_WDIS;
5008
5009                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5010                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
5011                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
5012                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
5013
5014                 /*
5015                  * RSC requires the mapping of the queue to the
5016                  * interrupt vector.
5017                  */
5018                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5019         }
5020
5021         dev->data->lro = 1;
5022
5023         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5024
5025         return 0;
5026 }
5027
5028 /*
5029  * Initializes Receive Unit.
5030  */
5031 int __rte_cold
5032 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5033 {
5034         struct ixgbe_hw     *hw;
5035         struct ixgbe_rx_queue *rxq;
5036         uint64_t bus_addr;
5037         uint32_t rxctrl;
5038         uint32_t fctrl;
5039         uint32_t hlreg0;
5040         uint32_t maxfrs;
5041         uint32_t srrctl;
5042         uint32_t rdrxctl;
5043         uint32_t rxcsum;
5044         uint16_t buf_size;
5045         uint16_t i;
5046         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5047         uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
5048         int rc;
5049
5050         PMD_INIT_FUNC_TRACE();
5051         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5052
5053         /*
5054          * Make sure receives are disabled while setting
5055          * up the RX context (registers, descriptor rings, etc.).
5056          */
5057         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5058         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5059
5060         /* Enable receipt of broadcasted frames */
5061         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5062         fctrl |= IXGBE_FCTRL_BAM;
5063         fctrl |= IXGBE_FCTRL_DPF;
5064         fctrl |= IXGBE_FCTRL_PMCF;
5065         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5066
5067         /*
5068          * Configure CRC stripping, if any.
5069          */
5070         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5071         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5072                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5073         else
5074                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5075
5076         /*
5077          * Configure jumbo frame support, if any.
5078          */
5079         if (dev->data->mtu > RTE_ETHER_MTU) {
5080                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5081                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5082                 maxfrs &= 0x0000FFFF;
5083                 maxfrs |= (frame_size << 16);
5084                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5085         } else
5086                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5087
5088         /*
5089          * If loopback mode is configured, set LPBK bit.
5090          */
5091         if (dev->data->dev_conf.lpbk_mode != 0) {
5092                 rc = ixgbe_check_supported_loopback_mode(dev);
5093                 if (rc < 0) {
5094                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5095                         return rc;
5096                 }
5097                 hlreg0 |= IXGBE_HLREG0_LPBK;
5098         } else {
5099                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5100         }
5101
5102         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5103
5104         /*
5105          * Assume no header split and no VLAN strip support
5106          * on any Rx queue first .
5107          */
5108         rx_conf->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5109         /* Setup RX queues */
5110         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5111                 rxq = dev->data->rx_queues[i];
5112
5113                 /*
5114                  * Reset crc_len in case it was changed after queue setup by a
5115                  * call to configure.
5116                  */
5117                 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5118                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5119                 else
5120                         rxq->crc_len = 0;
5121
5122                 /* Setup the Base and Length of the Rx Descriptor Rings */
5123                 bus_addr = rxq->rx_ring_phys_addr;
5124                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5125                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5126                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5127                                 (uint32_t)(bus_addr >> 32));
5128                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5129                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5130                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5131                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5132
5133                 /* Configure the SRRCTL register */
5134                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5135
5136                 /* Set if packets are dropped when no descriptors available */
5137                 if (rxq->drop_en)
5138                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5139
5140                 /*
5141                  * Configure the RX buffer size in the BSIZEPACKET field of
5142                  * the SRRCTL register of the queue.
5143                  * The value is in 1 KB resolution. Valid values can be from
5144                  * 1 KB to 16 KB.
5145                  */
5146                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5147                         RTE_PKTMBUF_HEADROOM);
5148                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5149                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5150
5151                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5152
5153                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5154                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5155
5156                 /* It adds dual VLAN length for supporting dual VLAN */
5157                 if (frame_size + 2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5158                         dev->data->scattered_rx = 1;
5159                 if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
5160                         rx_conf->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5161         }
5162
5163         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_SCATTER)
5164                 dev->data->scattered_rx = 1;
5165
5166         /*
5167          * Device configured with multiple RX queues.
5168          */
5169         ixgbe_dev_mq_rx_configure(dev);
5170
5171         /*
5172          * Setup the Checksum Register.
5173          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5174          * Enable IP/L4 checkum computation by hardware if requested to do so.
5175          */
5176         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5177         rxcsum |= IXGBE_RXCSUM_PCSD;
5178         if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
5179                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5180         else
5181                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5182
5183         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5184
5185         if (hw->mac.type == ixgbe_mac_82599EB ||
5186             hw->mac.type == ixgbe_mac_X540) {
5187                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5188                 if (rx_conf->offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
5189                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5190                 else
5191                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5192                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5193                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5194         }
5195
5196         rc = ixgbe_set_rsc(dev);
5197         if (rc)
5198                 return rc;
5199
5200         ixgbe_set_rx_function(dev);
5201
5202         return 0;
5203 }
5204
5205 /*
5206  * Initializes Transmit Unit.
5207  */
5208 void __rte_cold
5209 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5210 {
5211         struct ixgbe_hw     *hw;
5212         struct ixgbe_tx_queue *txq;
5213         uint64_t bus_addr;
5214         uint32_t hlreg0;
5215         uint32_t txctrl;
5216         uint16_t i;
5217
5218         PMD_INIT_FUNC_TRACE();
5219         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5220
5221         /* Enable TX CRC (checksum offload requirement) and hw padding
5222          * (TSO requirement)
5223          */
5224         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5225         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5226         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5227
5228         /* Setup the Base and Length of the Tx Descriptor Rings */
5229         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5230                 txq = dev->data->tx_queues[i];
5231
5232                 bus_addr = txq->tx_ring_phys_addr;
5233                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5234                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5235                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5236                                 (uint32_t)(bus_addr >> 32));
5237                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5238                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5239                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5240                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5241                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5242
5243                 /*
5244                  * Disable Tx Head Writeback RO bit, since this hoses
5245                  * bookkeeping if things aren't delivered in order.
5246                  */
5247                 switch (hw->mac.type) {
5248                 case ixgbe_mac_82598EB:
5249                         txctrl = IXGBE_READ_REG(hw,
5250                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5251                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5252                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5253                                         txctrl);
5254                         break;
5255
5256                 case ixgbe_mac_82599EB:
5257                 case ixgbe_mac_X540:
5258                 case ixgbe_mac_X550:
5259                 case ixgbe_mac_X550EM_x:
5260                 case ixgbe_mac_X550EM_a:
5261                 default:
5262                         txctrl = IXGBE_READ_REG(hw,
5263                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5264                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5265                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5266                                         txctrl);
5267                         break;
5268                 }
5269         }
5270
5271         /* Device configured with multiple TX queues. */
5272         ixgbe_dev_mq_tx_configure(dev);
5273 }
5274
5275 /*
5276  * Check if requested loopback mode is supported
5277  */
5278 int
5279 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5280 {
5281         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5282
5283         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5284                 if (hw->mac.type == ixgbe_mac_82599EB ||
5285                      hw->mac.type == ixgbe_mac_X540 ||
5286                      hw->mac.type == ixgbe_mac_X550 ||
5287                      hw->mac.type == ixgbe_mac_X550EM_x ||
5288                      hw->mac.type == ixgbe_mac_X550EM_a)
5289                         return 0;
5290
5291         return -ENOTSUP;
5292 }
5293
5294 /*
5295  * Set up link for 82599 loopback mode Tx->Rx.
5296  */
5297 static inline void __rte_cold
5298 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5299 {
5300         PMD_INIT_FUNC_TRACE();
5301
5302         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5303                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5304                                 IXGBE_SUCCESS) {
5305                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5306                         /* ignore error */
5307                         return;
5308                 }
5309         }
5310
5311         /* Restart link */
5312         IXGBE_WRITE_REG(hw,
5313                         IXGBE_AUTOC,
5314                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5315         ixgbe_reset_pipeline_82599(hw);
5316
5317         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5318         msec_delay(50);
5319 }
5320
5321
5322 /*
5323  * Start Transmit and Receive Units.
5324  */
5325 int __rte_cold
5326 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5327 {
5328         struct ixgbe_hw     *hw;
5329         struct ixgbe_tx_queue *txq;
5330         struct ixgbe_rx_queue *rxq;
5331         uint32_t txdctl;
5332         uint32_t dmatxctl;
5333         uint32_t rxctrl;
5334         uint16_t i;
5335         int ret = 0;
5336
5337         PMD_INIT_FUNC_TRACE();
5338         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5339
5340         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5341                 txq = dev->data->tx_queues[i];
5342                 /* Setup Transmit Threshold Registers */
5343                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5344                 txdctl |= txq->pthresh & 0x7F;
5345                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5346                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5347                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5348         }
5349
5350         if (hw->mac.type != ixgbe_mac_82598EB) {
5351                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5352                 dmatxctl |= IXGBE_DMATXCTL_TE;
5353                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5354         }
5355
5356         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5357                 txq = dev->data->tx_queues[i];
5358                 if (!txq->tx_deferred_start) {
5359                         ret = ixgbe_dev_tx_queue_start(dev, i);
5360                         if (ret < 0)
5361                                 return ret;
5362                 }
5363         }
5364
5365         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5366                 rxq = dev->data->rx_queues[i];
5367                 if (!rxq->rx_deferred_start) {
5368                         ret = ixgbe_dev_rx_queue_start(dev, i);
5369                         if (ret < 0)
5370                                 return ret;
5371                 }
5372         }
5373
5374         /* Enable Receive engine */
5375         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5376         if (hw->mac.type == ixgbe_mac_82598EB)
5377                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5378         rxctrl |= IXGBE_RXCTRL_RXEN;
5379         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5380
5381         /* If loopback mode is enabled, set up the link accordingly */
5382         if (dev->data->dev_conf.lpbk_mode != 0) {
5383                 if (hw->mac.type == ixgbe_mac_82599EB)
5384                         ixgbe_setup_loopback_link_82599(hw);
5385                 else if (hw->mac.type == ixgbe_mac_X540 ||
5386                      hw->mac.type == ixgbe_mac_X550 ||
5387                      hw->mac.type == ixgbe_mac_X550EM_x ||
5388                      hw->mac.type == ixgbe_mac_X550EM_a)
5389                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5390         }
5391
5392 #ifdef RTE_LIB_SECURITY
5393         if ((dev->data->dev_conf.rxmode.offloads &
5394                         RTE_ETH_RX_OFFLOAD_SECURITY) ||
5395                 (dev->data->dev_conf.txmode.offloads &
5396                         RTE_ETH_TX_OFFLOAD_SECURITY)) {
5397                 ret = ixgbe_crypto_enable_ipsec(dev);
5398                 if (ret != 0) {
5399                         PMD_DRV_LOG(ERR,
5400                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5401                                     ret);
5402                         return ret;
5403                 }
5404         }
5405 #endif
5406
5407         return 0;
5408 }
5409
5410 /*
5411  * Start Receive Units for specified queue.
5412  */
5413 int __rte_cold
5414 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5415 {
5416         struct ixgbe_hw     *hw;
5417         struct ixgbe_rx_queue *rxq;
5418         uint32_t rxdctl;
5419         int poll_ms;
5420
5421         PMD_INIT_FUNC_TRACE();
5422         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5423
5424         rxq = dev->data->rx_queues[rx_queue_id];
5425
5426         /* Allocate buffers for descriptor rings */
5427         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5428                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5429                              rx_queue_id);
5430                 return -1;
5431         }
5432         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5433         rxdctl |= IXGBE_RXDCTL_ENABLE;
5434         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5435
5436         /* Wait until RX Enable ready */
5437         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5438         do {
5439                 rte_delay_ms(1);
5440                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5441         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5442         if (!poll_ms)
5443                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5444         rte_wmb();
5445         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5446         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5447         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5448
5449         return 0;
5450 }
5451
5452 /*
5453  * Stop Receive Units for specified queue.
5454  */
5455 int __rte_cold
5456 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5457 {
5458         struct ixgbe_hw     *hw;
5459         struct ixgbe_adapter *adapter = dev->data->dev_private;
5460         struct ixgbe_rx_queue *rxq;
5461         uint32_t rxdctl;
5462         int poll_ms;
5463
5464         PMD_INIT_FUNC_TRACE();
5465         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5466
5467         rxq = dev->data->rx_queues[rx_queue_id];
5468
5469         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5470         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5471         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5472
5473         /* Wait until RX Enable bit clear */
5474         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5475         do {
5476                 rte_delay_ms(1);
5477                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5478         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5479         if (!poll_ms)
5480                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5481
5482         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5483
5484         ixgbe_rx_queue_release_mbufs(rxq);
5485         ixgbe_reset_rx_queue(adapter, rxq);
5486         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5487
5488         return 0;
5489 }
5490
5491
5492 /*
5493  * Start Transmit Units for specified queue.
5494  */
5495 int __rte_cold
5496 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5497 {
5498         struct ixgbe_hw     *hw;
5499         struct ixgbe_tx_queue *txq;
5500         uint32_t txdctl;
5501         int poll_ms;
5502
5503         PMD_INIT_FUNC_TRACE();
5504         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5505
5506         txq = dev->data->tx_queues[tx_queue_id];
5507         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5508         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5509         txdctl |= IXGBE_TXDCTL_ENABLE;
5510         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5511
5512         /* Wait until TX Enable ready */
5513         if (hw->mac.type == ixgbe_mac_82599EB) {
5514                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5515                 do {
5516                         rte_delay_ms(1);
5517                         txdctl = IXGBE_READ_REG(hw,
5518                                 IXGBE_TXDCTL(txq->reg_idx));
5519                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5520                 if (!poll_ms)
5521                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5522                                 tx_queue_id);
5523         }
5524         rte_wmb();
5525         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5526         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5527
5528         return 0;
5529 }
5530
5531 /*
5532  * Stop Transmit Units for specified queue.
5533  */
5534 int __rte_cold
5535 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5536 {
5537         struct ixgbe_hw     *hw;
5538         struct ixgbe_tx_queue *txq;
5539         uint32_t txdctl;
5540         uint32_t txtdh, txtdt;
5541         int poll_ms;
5542
5543         PMD_INIT_FUNC_TRACE();
5544         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5545
5546         txq = dev->data->tx_queues[tx_queue_id];
5547
5548         /* Wait until TX queue is empty */
5549         if (hw->mac.type == ixgbe_mac_82599EB) {
5550                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5551                 do {
5552                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5553                         txtdh = IXGBE_READ_REG(hw,
5554                                                IXGBE_TDH(txq->reg_idx));
5555                         txtdt = IXGBE_READ_REG(hw,
5556                                                IXGBE_TDT(txq->reg_idx));
5557                 } while (--poll_ms && (txtdh != txtdt));
5558                 if (!poll_ms)
5559                         PMD_INIT_LOG(ERR,
5560                                 "Tx Queue %d is not empty when stopping.",
5561                                 tx_queue_id);
5562         }
5563
5564         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5565         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5566         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5567
5568         /* Wait until TX Enable bit clear */
5569         if (hw->mac.type == ixgbe_mac_82599EB) {
5570                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5571                 do {
5572                         rte_delay_ms(1);
5573                         txdctl = IXGBE_READ_REG(hw,
5574                                                 IXGBE_TXDCTL(txq->reg_idx));
5575                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5576                 if (!poll_ms)
5577                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5578                                 tx_queue_id);
5579         }
5580
5581         if (txq->ops != NULL) {
5582                 txq->ops->release_mbufs(txq);
5583                 txq->ops->reset(txq);
5584         }
5585         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5586
5587         return 0;
5588 }
5589
5590 void
5591 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5592         struct rte_eth_rxq_info *qinfo)
5593 {
5594         struct ixgbe_rx_queue *rxq;
5595
5596         rxq = dev->data->rx_queues[queue_id];
5597
5598         qinfo->mp = rxq->mb_pool;
5599         qinfo->scattered_rx = dev->data->scattered_rx;
5600         qinfo->nb_desc = rxq->nb_rx_desc;
5601
5602         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5603         qinfo->conf.rx_drop_en = rxq->drop_en;
5604         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5605         qinfo->conf.offloads = rxq->offloads;
5606 }
5607
5608 void
5609 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5610         struct rte_eth_txq_info *qinfo)
5611 {
5612         struct ixgbe_tx_queue *txq;
5613
5614         txq = dev->data->tx_queues[queue_id];
5615
5616         qinfo->nb_desc = txq->nb_tx_desc;
5617
5618         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5619         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5620         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5621
5622         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5623         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5624         qinfo->conf.offloads = txq->offloads;
5625         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5626 }
5627
5628 /*
5629  * [VF] Initializes Receive Unit.
5630  */
5631 int __rte_cold
5632 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5633 {
5634         struct ixgbe_hw     *hw;
5635         struct ixgbe_rx_queue *rxq;
5636         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5637         uint32_t frame_size = dev->data->mtu + IXGBE_ETH_OVERHEAD;
5638         uint64_t bus_addr;
5639         uint32_t srrctl, psrtype = 0;
5640         uint16_t buf_size;
5641         uint16_t i;
5642         int ret;
5643
5644         PMD_INIT_FUNC_TRACE();
5645         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5646
5647         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5648                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5649                         "it should be power of 2");
5650                 return -1;
5651         }
5652
5653         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5654                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5655                         "it should be equal to or less than %d",
5656                         hw->mac.max_rx_queues);
5657                 return -1;
5658         }
5659
5660         /*
5661          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5662          * disables the VF receipt of packets if the PF MTU is > 1500.
5663          * This is done to deal with 82599 limitations that imposes
5664          * the PF and all VFs to share the same MTU.
5665          * Then, the PF driver enables again the VF receipt of packet when
5666          * the VF driver issues a IXGBE_VF_SET_LPE request.
5667          * In the meantime, the VF device cannot be used, even if the VF driver
5668          * and the Guest VM network stack are ready to accept packets with a
5669          * size up to the PF MTU.
5670          * As a work-around to this PF behaviour, force the call to
5671          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5672          * VF packets received can work in all cases.
5673          */
5674         if (ixgbevf_rlpml_set_vf(hw, frame_size) != 0)
5675                 PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5676                              frame_size);
5677
5678         /*
5679          * Assume no header split and no VLAN strip support
5680          * on any Rx queue first .
5681          */
5682         rxmode->offloads &= ~RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5683         /* Setup RX queues */
5684         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5685                 rxq = dev->data->rx_queues[i];
5686
5687                 /* Allocate buffers for descriptor rings */
5688                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5689                 if (ret)
5690                         return ret;
5691
5692                 /* Setup the Base and Length of the Rx Descriptor Rings */
5693                 bus_addr = rxq->rx_ring_phys_addr;
5694
5695                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5696                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5697                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5698                                 (uint32_t)(bus_addr >> 32));
5699                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5700                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5701                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5702                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5703
5704
5705                 /* Configure the SRRCTL register */
5706                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5707
5708                 /* Set if packets are dropped when no descriptors available */
5709                 if (rxq->drop_en)
5710                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5711
5712                 /*
5713                  * Configure the RX buffer size in the BSIZEPACKET field of
5714                  * the SRRCTL register of the queue.
5715                  * The value is in 1 KB resolution. Valid values can be from
5716                  * 1 KB to 16 KB.
5717                  */
5718                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5719                         RTE_PKTMBUF_HEADROOM);
5720                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5721                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5722
5723                 /*
5724                  * VF modification to write virtual function SRRCTL register
5725                  */
5726                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5727
5728                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5729                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5730
5731                 if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_SCATTER ||
5732                     /* It adds dual VLAN length for supporting dual VLAN */
5733                     (frame_size + 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5734                         if (!dev->data->scattered_rx)
5735                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5736                         dev->data->scattered_rx = 1;
5737                 }
5738
5739                 if (rxq->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
5740                         rxmode->offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
5741         }
5742
5743         /* Set RQPL for VF RSS according to max Rx queue */
5744         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5745                 IXGBE_PSRTYPE_RQPL_SHIFT;
5746         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5747
5748         ixgbe_set_rx_function(dev);
5749
5750         return 0;
5751 }
5752
5753 /*
5754  * [VF] Initializes Transmit Unit.
5755  */
5756 void __rte_cold
5757 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5758 {
5759         struct ixgbe_hw     *hw;
5760         struct ixgbe_tx_queue *txq;
5761         uint64_t bus_addr;
5762         uint32_t txctrl;
5763         uint16_t i;
5764
5765         PMD_INIT_FUNC_TRACE();
5766         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5767
5768         /* Setup the Base and Length of the Tx Descriptor Rings */
5769         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5770                 txq = dev->data->tx_queues[i];
5771                 bus_addr = txq->tx_ring_phys_addr;
5772                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5773                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5774                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5775                                 (uint32_t)(bus_addr >> 32));
5776                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5777                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5778                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5779                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5780                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5781
5782                 /*
5783                  * Disable Tx Head Writeback RO bit, since this hoses
5784                  * bookkeeping if things aren't delivered in order.
5785                  */
5786                 txctrl = IXGBE_READ_REG(hw,
5787                                 IXGBE_VFDCA_TXCTRL(i));
5788                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5789                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5790                                 txctrl);
5791         }
5792 }
5793
5794 /*
5795  * [VF] Start Transmit and Receive Units.
5796  */
5797 void __rte_cold
5798 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5799 {
5800         struct ixgbe_hw     *hw;
5801         struct ixgbe_tx_queue *txq;
5802         struct ixgbe_rx_queue *rxq;
5803         uint32_t txdctl;
5804         uint32_t rxdctl;
5805         uint16_t i;
5806         int poll_ms;
5807
5808         PMD_INIT_FUNC_TRACE();
5809         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5810
5811         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5812                 txq = dev->data->tx_queues[i];
5813                 /* Setup Transmit Threshold Registers */
5814                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5815                 txdctl |= txq->pthresh & 0x7F;
5816                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5817                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5818                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5819         }
5820
5821         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5822
5823                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5824                 txdctl |= IXGBE_TXDCTL_ENABLE;
5825                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5826
5827                 poll_ms = 10;
5828                 /* Wait until TX Enable ready */
5829                 do {
5830                         rte_delay_ms(1);
5831                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5832                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5833                 if (!poll_ms)
5834                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5835         }
5836         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5837
5838                 rxq = dev->data->rx_queues[i];
5839
5840                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5841                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5842                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5843
5844                 /* Wait until RX Enable ready */
5845                 poll_ms = 10;
5846                 do {
5847                         rte_delay_ms(1);
5848                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5849                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5850                 if (!poll_ms)
5851                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5852                 rte_wmb();
5853                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5854
5855         }
5856 }
5857
5858 int
5859 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5860                     const struct rte_flow_action_rss *in)
5861 {
5862         if (in->key_len > RTE_DIM(out->key) ||
5863             in->queue_num > RTE_DIM(out->queue))
5864                 return -EINVAL;
5865         out->conf = (struct rte_flow_action_rss){
5866                 .func = in->func,
5867                 .level = in->level,
5868                 .types = in->types,
5869                 .key_len = in->key_len,
5870                 .queue_num = in->queue_num,
5871                 .key = memcpy(out->key, in->key, in->key_len),
5872                 .queue = memcpy(out->queue, in->queue,
5873                                 sizeof(*in->queue) * in->queue_num),
5874         };
5875         return 0;
5876 }
5877
5878 int
5879 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5880                       const struct rte_flow_action_rss *with)
5881 {
5882         return (comp->func == with->func &&
5883                 comp->level == with->level &&
5884                 comp->types == with->types &&
5885                 comp->key_len == with->key_len &&
5886                 comp->queue_num == with->queue_num &&
5887                 !memcmp(comp->key, with->key, with->key_len) &&
5888                 !memcmp(comp->queue, with->queue,
5889                         sizeof(*with->queue) * with->queue_num));
5890 }
5891
5892 int
5893 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5894                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5895 {
5896         struct ixgbe_hw *hw;
5897         uint32_t reta;
5898         uint16_t i;
5899         uint16_t j;
5900         uint16_t sp_reta_size;
5901         uint32_t reta_reg;
5902         struct rte_eth_rss_conf rss_conf = {
5903                 .rss_key = conf->conf.key_len ?
5904                         (void *)(uintptr_t)conf->conf.key : NULL,
5905                 .rss_key_len = conf->conf.key_len,
5906                 .rss_hf = conf->conf.types,
5907         };
5908         struct ixgbe_filter_info *filter_info =
5909                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5910
5911         PMD_INIT_FUNC_TRACE();
5912         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5913
5914         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5915
5916         if (!add) {
5917                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5918                                           &conf->conf)) {
5919                         ixgbe_rss_disable(dev);
5920                         memset(&filter_info->rss_info, 0,
5921                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5922                         return 0;
5923                 }
5924                 return -EINVAL;
5925         }
5926
5927         if (filter_info->rss_info.conf.queue_num)
5928                 return -EINVAL;
5929         /* Fill in redirection table
5930          * The byte-swap is needed because NIC registers are in
5931          * little-endian order.
5932          */
5933         reta = 0;
5934         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5935                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5936
5937                 if (j == conf->conf.queue_num)
5938                         j = 0;
5939                 reta = (reta << 8) | conf->conf.queue[j];
5940                 if ((i & 3) == 3)
5941                         IXGBE_WRITE_REG(hw, reta_reg,
5942                                         rte_bswap32(reta));
5943         }
5944
5945         /* Configure the RSS key and the RSS protocols used to compute
5946          * the RSS hash of input packets.
5947          */
5948         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5949                 ixgbe_rss_disable(dev);
5950                 return 0;
5951         }
5952         if (rss_conf.rss_key == NULL)
5953                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5954         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5955
5956         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5957                 return -EINVAL;
5958
5959         return 0;
5960 }
5961
5962 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5963 #if defined(RTE_ARCH_PPC_64)
5964 int
5965 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5966 {
5967         return -1;
5968 }
5969
5970 uint16_t
5971 ixgbe_recv_pkts_vec(
5972         void __rte_unused *rx_queue,
5973         struct rte_mbuf __rte_unused **rx_pkts,
5974         uint16_t __rte_unused nb_pkts)
5975 {
5976         return 0;
5977 }
5978
5979 uint16_t
5980 ixgbe_recv_scattered_pkts_vec(
5981         void __rte_unused *rx_queue,
5982         struct rte_mbuf __rte_unused **rx_pkts,
5983         uint16_t __rte_unused nb_pkts)
5984 {
5985         return 0;
5986 }
5987
5988 int
5989 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5990 {
5991         return -1;
5992 }
5993
5994 uint16_t
5995 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5996                 struct rte_mbuf __rte_unused **tx_pkts,
5997                 uint16_t __rte_unused nb_pkts)
5998 {
5999         return 0;
6000 }
6001
6002 int
6003 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
6004 {
6005         return -1;
6006 }
6007
6008 void
6009 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
6010 {
6011         return;
6012 }
6013 #endif