net/ixgbe: implement power management API
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
39 #include <rte_udp.h>
40 #include <rte_tcp.h>
41 #include <rte_sctp.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
44 #include <rte_ip.h>
45 #include <rte_net.h>
46 #include <rte_vect.h>
47
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
55
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
58 #else
59 #define IXGBE_TX_IEEE1588_TMST 0
60 #endif
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK (                  \
63                 PKT_TX_OUTER_IPV6 |              \
64                 PKT_TX_OUTER_IPV4 |              \
65                 PKT_TX_IPV6 |                    \
66                 PKT_TX_IPV4 |                    \
67                 PKT_TX_VLAN_PKT |                \
68                 PKT_TX_IP_CKSUM |                \
69                 PKT_TX_L4_MASK |                 \
70                 PKT_TX_TCP_SEG |                 \
71                 PKT_TX_MACSEC |                  \
72                 PKT_TX_OUTER_IP_CKSUM |          \
73                 PKT_TX_SEC_OFFLOAD |     \
74                 IXGBE_TX_IEEE1588_TMST)
75
76 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
77                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
78
79 #if 1
80 #define RTE_PMD_USE_PREFETCH
81 #endif
82
83 #ifdef RTE_PMD_USE_PREFETCH
84 /*
85  * Prefetch a cache line into all cache levels.
86  */
87 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
88 #else
89 #define rte_ixgbe_prefetch(p)   do {} while (0)
90 #endif
91
92 /*********************************************************************
93  *
94  *  TX functions
95  *
96  **********************************************************************/
97
98 /*
99  * Check for descriptors with their DD bit set and free mbufs.
100  * Return the total number of buffers freed.
101  */
102 static __rte_always_inline int
103 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
104 {
105         struct ixgbe_tx_entry *txep;
106         uint32_t status;
107         int i, nb_free = 0;
108         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
109
110         /* check DD bit on threshold descriptor */
111         status = txq->tx_ring[txq->tx_next_dd].wb.status;
112         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
113                 return 0;
114
115         /*
116          * first buffer to free from S/W ring is at index
117          * tx_next_dd - (tx_rs_thresh-1)
118          */
119         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
120
121         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
122                 /* free buffers one at a time */
123                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
124                 txep->mbuf = NULL;
125
126                 if (unlikely(m == NULL))
127                         continue;
128
129                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
130                     (nb_free > 0 && m->pool != free[0]->pool)) {
131                         rte_mempool_put_bulk(free[0]->pool,
132                                              (void **)free, nb_free);
133                         nb_free = 0;
134                 }
135
136                 free[nb_free++] = m;
137         }
138
139         if (nb_free > 0)
140                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
141
142         /* buffers were freed, update counters */
143         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
144         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
145         if (txq->tx_next_dd >= txq->nb_tx_desc)
146                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
147
148         return txq->tx_rs_thresh;
149 }
150
151 /* Populate 4 descriptors with data from 4 mbufs */
152 static inline void
153 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
154 {
155         uint64_t buf_dma_addr;
156         uint32_t pkt_len;
157         int i;
158
159         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
160                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
161                 pkt_len = (*pkts)->data_len;
162
163                 /* write data to descriptor */
164                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
165
166                 txdp->read.cmd_type_len =
167                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
168
169                 txdp->read.olinfo_status =
170                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
171
172                 rte_prefetch0(&(*pkts)->pool);
173         }
174 }
175
176 /* Populate 1 descriptor with data from 1 mbuf */
177 static inline void
178 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
179 {
180         uint64_t buf_dma_addr;
181         uint32_t pkt_len;
182
183         buf_dma_addr = rte_mbuf_data_iova(*pkts);
184         pkt_len = (*pkts)->data_len;
185
186         /* write data to descriptor */
187         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
188         txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190         txdp->read.olinfo_status =
191                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
192         rte_prefetch0(&(*pkts)->pool);
193 }
194
195 /*
196  * Fill H/W descriptor ring with mbuf data.
197  * Copy mbuf pointers to the S/W ring.
198  */
199 static inline void
200 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
201                       uint16_t nb_pkts)
202 {
203         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
204         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
205         const int N_PER_LOOP = 4;
206         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
207         int mainpart, leftover;
208         int i, j;
209
210         /*
211          * Process most of the packets in chunks of N pkts.  Any
212          * leftover packets will get processed one at a time.
213          */
214         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
215         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
216         for (i = 0; i < mainpart; i += N_PER_LOOP) {
217                 /* Copy N mbuf pointers to the S/W ring */
218                 for (j = 0; j < N_PER_LOOP; ++j) {
219                         (txep + i + j)->mbuf = *(pkts + i + j);
220                 }
221                 tx4(txdp + i, pkts + i);
222         }
223
224         if (unlikely(leftover > 0)) {
225                 for (i = 0; i < leftover; ++i) {
226                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
227                         tx1(txdp + mainpart + i, pkts + mainpart + i);
228                 }
229         }
230 }
231
232 static inline uint16_t
233 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
234              uint16_t nb_pkts)
235 {
236         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
237         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
238         uint16_t n = 0;
239
240         /*
241          * Begin scanning the H/W ring for done descriptors when the
242          * number of available descriptors drops below tx_free_thresh.  For
243          * each done descriptor, free the associated buffer.
244          */
245         if (txq->nb_tx_free < txq->tx_free_thresh)
246                 ixgbe_tx_free_bufs(txq);
247
248         /* Only use descriptors that are available */
249         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
250         if (unlikely(nb_pkts == 0))
251                 return 0;
252
253         /* Use exactly nb_pkts descriptors */
254         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
255
256         /*
257          * At this point, we know there are enough descriptors in the
258          * ring to transmit all the packets.  This assumes that each
259          * mbuf contains a single segment, and that no new offloads
260          * are expected, which would require a new context descriptor.
261          */
262
263         /*
264          * See if we're going to wrap-around. If so, handle the top
265          * of the descriptor ring first, then do the bottom.  If not,
266          * the processing looks just like the "bottom" part anyway...
267          */
268         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
269                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
270                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
271
272                 /*
273                  * We know that the last descriptor in the ring will need to
274                  * have its RS bit set because tx_rs_thresh has to be
275                  * a divisor of the ring size
276                  */
277                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
278                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
279                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
280
281                 txq->tx_tail = 0;
282         }
283
284         /* Fill H/W descriptor ring with mbuf data */
285         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
286         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
287
288         /*
289          * Determine if RS bit should be set
290          * This is what we actually want:
291          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
292          * but instead of subtracting 1 and doing >=, we can just do
293          * greater than without subtracting.
294          */
295         if (txq->tx_tail > txq->tx_next_rs) {
296                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
297                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
298                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
299                                                 txq->tx_rs_thresh);
300                 if (txq->tx_next_rs >= txq->nb_tx_desc)
301                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302         }
303
304         /*
305          * Check for wrap-around. This would only happen if we used
306          * up to the last descriptor in the ring, no more, no less.
307          */
308         if (txq->tx_tail >= txq->nb_tx_desc)
309                 txq->tx_tail = 0;
310
311         /* update tail pointer */
312         rte_wmb();
313         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
314
315         return nb_pkts;
316 }
317
318 uint16_t
319 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
320                        uint16_t nb_pkts)
321 {
322         uint16_t nb_tx;
323
324         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
325         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
326                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
327
328         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
329         nb_tx = 0;
330         while (nb_pkts) {
331                 uint16_t ret, n;
332
333                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
334                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
335                 nb_tx = (uint16_t)(nb_tx + ret);
336                 nb_pkts = (uint16_t)(nb_pkts - ret);
337                 if (ret < n)
338                         break;
339         }
340
341         return nb_tx;
342 }
343
344 static uint16_t
345 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
346                     uint16_t nb_pkts)
347 {
348         uint16_t nb_tx = 0;
349         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
350
351         while (nb_pkts) {
352                 uint16_t ret, num;
353
354                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
355                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
356                                                  num);
357                 nb_tx += ret;
358                 nb_pkts -= ret;
359                 if (ret < num)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
370                 __rte_unused uint64_t *mdata)
371 {
372         uint32_t type_tucmd_mlhl;
373         uint32_t mss_l4len_idx = 0;
374         uint32_t ctx_idx;
375         uint32_t vlan_macip_lens;
376         union ixgbe_tx_offload tx_offload_mask;
377         uint32_t seqnum_seed = 0;
378
379         ctx_idx = txq->ctx_curr;
380         tx_offload_mask.data[0] = 0;
381         tx_offload_mask.data[1] = 0;
382         type_tucmd_mlhl = 0;
383
384         /* Specify which HW CTX to upload. */
385         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
386
387         if (ol_flags & PKT_TX_VLAN_PKT) {
388                 tx_offload_mask.vlan_tci |= ~0;
389         }
390
391         /* check if TCP segmentation required for this packet */
392         if (ol_flags & PKT_TX_TCP_SEG) {
393                 /* implies IP cksum in IPv4 */
394                 if (ol_flags & PKT_TX_IP_CKSUM)
395                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
396                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
397                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
398                 else
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
400                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
401                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
402
403                 tx_offload_mask.l2_len |= ~0;
404                 tx_offload_mask.l3_len |= ~0;
405                 tx_offload_mask.l4_len |= ~0;
406                 tx_offload_mask.tso_segsz |= ~0;
407                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
408                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
409         } else { /* no TSO, check if hardware checksum is needed */
410                 if (ol_flags & PKT_TX_IP_CKSUM) {
411                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
412                         tx_offload_mask.l2_len |= ~0;
413                         tx_offload_mask.l3_len |= ~0;
414                 }
415
416                 switch (ol_flags & PKT_TX_L4_MASK) {
417                 case PKT_TX_UDP_CKSUM:
418                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
419                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
421                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
422                         tx_offload_mask.l2_len |= ~0;
423                         tx_offload_mask.l3_len |= ~0;
424                         break;
425                 case PKT_TX_TCP_CKSUM:
426                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
429                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
430                         tx_offload_mask.l2_len |= ~0;
431                         tx_offload_mask.l3_len |= ~0;
432                         break;
433                 case PKT_TX_SCTP_CKSUM:
434                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
435                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
436                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
437                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
438                         tx_offload_mask.l2_len |= ~0;
439                         tx_offload_mask.l3_len |= ~0;
440                         break;
441                 default:
442                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
443                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
444                         break;
445                 }
446         }
447
448         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
449                 tx_offload_mask.outer_l2_len |= ~0;
450                 tx_offload_mask.outer_l3_len |= ~0;
451                 tx_offload_mask.l2_len |= ~0;
452                 seqnum_seed |= tx_offload.outer_l3_len
453                                << IXGBE_ADVTXD_OUTER_IPLEN;
454                 seqnum_seed |= tx_offload.l2_len
455                                << IXGBE_ADVTXD_TUNNEL_LEN;
456         }
457 #ifdef RTE_LIB_SECURITY
458         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
459                 union ixgbe_crypto_tx_desc_md *md =
460                                 (union ixgbe_crypto_tx_desc_md *)mdata;
461                 seqnum_seed |=
462                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
463                 type_tucmd_mlhl |= md->enc ?
464                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
465                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
466                 type_tucmd_mlhl |=
467                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
468                 tx_offload_mask.sa_idx |= ~0;
469                 tx_offload_mask.sec_pad_len |= ~0;
470         }
471 #endif
472
473         txq->ctx_cache[ctx_idx].flags = ol_flags;
474         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
475                 tx_offload_mask.data[0] & tx_offload.data[0];
476         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
477                 tx_offload_mask.data[1] & tx_offload.data[1];
478         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
479
480         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
481         vlan_macip_lens = tx_offload.l3_len;
482         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
483                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
484                                     IXGBE_ADVTXD_MACLEN_SHIFT);
485         else
486                 vlan_macip_lens |= (tx_offload.l2_len <<
487                                     IXGBE_ADVTXD_MACLEN_SHIFT);
488         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
489         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
490         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
491         ctx_txd->seqnum_seed     = seqnum_seed;
492 }
493
494 /*
495  * Check which hardware context can be used. Use the existing match
496  * or create a new context descriptor.
497  */
498 static inline uint32_t
499 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
500                    union ixgbe_tx_offload tx_offload)
501 {
502         /* If match with the current used context */
503         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
504                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
505                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
506                      & tx_offload.data[0])) &&
507                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
508                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
509                      & tx_offload.data[1]))))
510                 return txq->ctx_curr;
511
512         /* What if match with the next context  */
513         txq->ctx_curr ^= 1;
514         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
515                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
516                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
517                      & tx_offload.data[0])) &&
518                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
519                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
520                      & tx_offload.data[1]))))
521                 return txq->ctx_curr;
522
523         /* Mismatch, use the previous context */
524         return IXGBE_CTX_NUM;
525 }
526
527 static inline uint32_t
528 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
529 {
530         uint32_t tmp = 0;
531
532         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
534         if (ol_flags & PKT_TX_IP_CKSUM)
535                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
536         if (ol_flags & PKT_TX_TCP_SEG)
537                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
538         return tmp;
539 }
540
541 static inline uint32_t
542 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
543 {
544         uint32_t cmdtype = 0;
545
546         if (ol_flags & PKT_TX_VLAN_PKT)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
548         if (ol_flags & PKT_TX_TCP_SEG)
549                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
550         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
551                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
552         if (ol_flags & PKT_TX_MACSEC)
553                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
554         return cmdtype;
555 }
556
557 /* Default RS bit threshold values */
558 #ifndef DEFAULT_TX_RS_THRESH
559 #define DEFAULT_TX_RS_THRESH   32
560 #endif
561 #ifndef DEFAULT_TX_FREE_THRESH
562 #define DEFAULT_TX_FREE_THRESH 32
563 #endif
564
565 /* Reset transmit descriptors after they have been used */
566 static inline int
567 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
568 {
569         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
570         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
571         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
572         uint16_t nb_tx_desc = txq->nb_tx_desc;
573         uint16_t desc_to_clean_to;
574         uint16_t nb_tx_to_clean;
575         uint32_t status;
576
577         /* Determine the last descriptor needing to be cleaned */
578         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
579         if (desc_to_clean_to >= nb_tx_desc)
580                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
581
582         /* Check to make sure the last descriptor to clean is done */
583         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
584         status = txr[desc_to_clean_to].wb.status;
585         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
586                 PMD_TX_FREE_LOG(DEBUG,
587                                 "TX descriptor %4u is not done"
588                                 "(port=%d queue=%d)",
589                                 desc_to_clean_to,
590                                 txq->port_id, txq->queue_id);
591                 /* Failed to clean any descriptors, better luck next time */
592                 return -(1);
593         }
594
595         /* Figure out how many descriptors will be cleaned */
596         if (last_desc_cleaned > desc_to_clean_to)
597                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
598                                                         desc_to_clean_to);
599         else
600                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
601                                                 last_desc_cleaned);
602
603         PMD_TX_FREE_LOG(DEBUG,
604                         "Cleaning %4u TX descriptors: %4u to %4u "
605                         "(port=%d queue=%d)",
606                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
607                         txq->port_id, txq->queue_id);
608
609         /*
610          * The last descriptor to clean is done, so that means all the
611          * descriptors from the last descriptor that was cleaned
612          * up to the last descriptor with the RS bit set
613          * are done. Only reset the threshold descriptor.
614          */
615         txr[desc_to_clean_to].wb.status = 0;
616
617         /* Update the txq to reflect the last descriptor that was cleaned */
618         txq->last_desc_cleaned = desc_to_clean_to;
619         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
620
621         /* No Error */
622         return 0;
623 }
624
625 uint16_t
626 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
627                 uint16_t nb_pkts)
628 {
629         struct ixgbe_tx_queue *txq;
630         struct ixgbe_tx_entry *sw_ring;
631         struct ixgbe_tx_entry *txe, *txn;
632         volatile union ixgbe_adv_tx_desc *txr;
633         volatile union ixgbe_adv_tx_desc *txd, *txp;
634         struct rte_mbuf     *tx_pkt;
635         struct rte_mbuf     *m_seg;
636         uint64_t buf_dma_addr;
637         uint32_t olinfo_status;
638         uint32_t cmd_type_len;
639         uint32_t pkt_len;
640         uint16_t slen;
641         uint64_t ol_flags;
642         uint16_t tx_id;
643         uint16_t tx_last;
644         uint16_t nb_tx;
645         uint16_t nb_used;
646         uint64_t tx_ol_req;
647         uint32_t ctx = 0;
648         uint32_t new_ctx;
649         union ixgbe_tx_offload tx_offload;
650 #ifdef RTE_LIB_SECURITY
651         uint8_t use_ipsec;
652 #endif
653
654         tx_offload.data[0] = 0;
655         tx_offload.data[1] = 0;
656         txq = tx_queue;
657         sw_ring = txq->sw_ring;
658         txr     = txq->tx_ring;
659         tx_id   = txq->tx_tail;
660         txe = &sw_ring[tx_id];
661         txp = NULL;
662
663         /* Determine if the descriptor ring needs to be cleaned. */
664         if (txq->nb_tx_free < txq->tx_free_thresh)
665                 ixgbe_xmit_cleanup(txq);
666
667         rte_prefetch0(&txe->mbuf->pool);
668
669         /* TX loop */
670         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
671                 new_ctx = 0;
672                 tx_pkt = *tx_pkts++;
673                 pkt_len = tx_pkt->pkt_len;
674
675                 /*
676                  * Determine how many (if any) context descriptors
677                  * are needed for offload functionality.
678                  */
679                 ol_flags = tx_pkt->ol_flags;
680 #ifdef RTE_LIB_SECURITY
681                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
682 #endif
683
684                 /* If hardware offload required */
685                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
686                 if (tx_ol_req) {
687                         tx_offload.l2_len = tx_pkt->l2_len;
688                         tx_offload.l3_len = tx_pkt->l3_len;
689                         tx_offload.l4_len = tx_pkt->l4_len;
690                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
691                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
692                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
693                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
694 #ifdef RTE_LIB_SECURITY
695                         if (use_ipsec) {
696                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
697                                         (union ixgbe_crypto_tx_desc_md *)
698                                                 rte_security_dynfield(tx_pkt);
699                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
700                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
701                         }
702 #endif
703
704                         /* If new context need be built or reuse the exist ctx. */
705                         ctx = what_advctx_update(txq, tx_ol_req,
706                                 tx_offload);
707                         /* Only allocate context descriptor if required*/
708                         new_ctx = (ctx == IXGBE_CTX_NUM);
709                         ctx = txq->ctx_curr;
710                 }
711
712                 /*
713                  * Keep track of how many descriptors are used this loop
714                  * This will always be the number of segments + the number of
715                  * Context descriptors required to transmit the packet
716                  */
717                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
718
719                 if (txp != NULL &&
720                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
721                         /* set RS on the previous packet in the burst */
722                         txp->read.cmd_type_len |=
723                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
724
725                 /*
726                  * The number of descriptors that must be allocated for a
727                  * packet is the number of segments of that packet, plus 1
728                  * Context Descriptor for the hardware offload, if any.
729                  * Determine the last TX descriptor to allocate in the TX ring
730                  * for the packet, starting from the current position (tx_id)
731                  * in the ring.
732                  */
733                 tx_last = (uint16_t) (tx_id + nb_used - 1);
734
735                 /* Circular ring */
736                 if (tx_last >= txq->nb_tx_desc)
737                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
738
739                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
740                            " tx_first=%u tx_last=%u",
741                            (unsigned) txq->port_id,
742                            (unsigned) txq->queue_id,
743                            (unsigned) pkt_len,
744                            (unsigned) tx_id,
745                            (unsigned) tx_last);
746
747                 /*
748                  * Make sure there are enough TX descriptors available to
749                  * transmit the entire packet.
750                  * nb_used better be less than or equal to txq->tx_rs_thresh
751                  */
752                 if (nb_used > txq->nb_tx_free) {
753                         PMD_TX_FREE_LOG(DEBUG,
754                                         "Not enough free TX descriptors "
755                                         "nb_used=%4u nb_free=%4u "
756                                         "(port=%d queue=%d)",
757                                         nb_used, txq->nb_tx_free,
758                                         txq->port_id, txq->queue_id);
759
760                         if (ixgbe_xmit_cleanup(txq) != 0) {
761                                 /* Could not clean any descriptors */
762                                 if (nb_tx == 0)
763                                         return 0;
764                                 goto end_of_tx;
765                         }
766
767                         /* nb_used better be <= txq->tx_rs_thresh */
768                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
769                                 PMD_TX_FREE_LOG(DEBUG,
770                                         "The number of descriptors needed to "
771                                         "transmit the packet exceeds the "
772                                         "RS bit threshold. This will impact "
773                                         "performance."
774                                         "nb_used=%4u nb_free=%4u "
775                                         "tx_rs_thresh=%4u. "
776                                         "(port=%d queue=%d)",
777                                         nb_used, txq->nb_tx_free,
778                                         txq->tx_rs_thresh,
779                                         txq->port_id, txq->queue_id);
780                                 /*
781                                  * Loop here until there are enough TX
782                                  * descriptors or until the ring cannot be
783                                  * cleaned.
784                                  */
785                                 while (nb_used > txq->nb_tx_free) {
786                                         if (ixgbe_xmit_cleanup(txq) != 0) {
787                                                 /*
788                                                  * Could not clean any
789                                                  * descriptors
790                                                  */
791                                                 if (nb_tx == 0)
792                                                         return 0;
793                                                 goto end_of_tx;
794                                         }
795                                 }
796                         }
797                 }
798
799                 /*
800                  * By now there are enough free TX descriptors to transmit
801                  * the packet.
802                  */
803
804                 /*
805                  * Set common flags of all TX Data Descriptors.
806                  *
807                  * The following bits must be set in all Data Descriptors:
808                  *   - IXGBE_ADVTXD_DTYP_DATA
809                  *   - IXGBE_ADVTXD_DCMD_DEXT
810                  *
811                  * The following bits must be set in the first Data Descriptor
812                  * and are ignored in the other ones:
813                  *   - IXGBE_ADVTXD_DCMD_IFCS
814                  *   - IXGBE_ADVTXD_MAC_1588
815                  *   - IXGBE_ADVTXD_DCMD_VLE
816                  *
817                  * The following bits must only be set in the last Data
818                  * Descriptor:
819                  *   - IXGBE_TXD_CMD_EOP
820                  *
821                  * The following bits can be set in any Data Descriptor, but
822                  * are only set in the last Data Descriptor:
823                  *   - IXGBE_TXD_CMD_RS
824                  */
825                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
826                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
827
828 #ifdef RTE_LIBRTE_IEEE1588
829                 if (ol_flags & PKT_TX_IEEE1588_TMST)
830                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
831 #endif
832
833                 olinfo_status = 0;
834                 if (tx_ol_req) {
835
836                         if (ol_flags & PKT_TX_TCP_SEG) {
837                                 /* when TSO is on, paylen in descriptor is the
838                                  * not the packet len but the tcp payload len */
839                                 pkt_len -= (tx_offload.l2_len +
840                                         tx_offload.l3_len + tx_offload.l4_len);
841                         }
842
843                         /*
844                          * Setup the TX Advanced Context Descriptor if required
845                          */
846                         if (new_ctx) {
847                                 volatile struct ixgbe_adv_tx_context_desc *
848                                     ctx_txd;
849
850                                 ctx_txd = (volatile struct
851                                     ixgbe_adv_tx_context_desc *)
852                                     &txr[tx_id];
853
854                                 txn = &sw_ring[txe->next_id];
855                                 rte_prefetch0(&txn->mbuf->pool);
856
857                                 if (txe->mbuf != NULL) {
858                                         rte_pktmbuf_free_seg(txe->mbuf);
859                                         txe->mbuf = NULL;
860                                 }
861
862                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
863                                         tx_offload,
864                                         rte_security_dynfield(tx_pkt));
865
866                                 txe->last_id = tx_last;
867                                 tx_id = txe->next_id;
868                                 txe = txn;
869                         }
870
871                         /*
872                          * Setup the TX Advanced Data Descriptor,
873                          * This path will go through
874                          * whatever new/reuse the context descriptor
875                          */
876                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
877                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
878                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
879                 }
880
881                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
882 #ifdef RTE_LIB_SECURITY
883                 if (use_ipsec)
884                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
885 #endif
886
887                 m_seg = tx_pkt;
888                 do {
889                         txd = &txr[tx_id];
890                         txn = &sw_ring[txe->next_id];
891                         rte_prefetch0(&txn->mbuf->pool);
892
893                         if (txe->mbuf != NULL)
894                                 rte_pktmbuf_free_seg(txe->mbuf);
895                         txe->mbuf = m_seg;
896
897                         /*
898                          * Set up Transmit Data Descriptor.
899                          */
900                         slen = m_seg->data_len;
901                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
902                         txd->read.buffer_addr =
903                                 rte_cpu_to_le_64(buf_dma_addr);
904                         txd->read.cmd_type_len =
905                                 rte_cpu_to_le_32(cmd_type_len | slen);
906                         txd->read.olinfo_status =
907                                 rte_cpu_to_le_32(olinfo_status);
908                         txe->last_id = tx_last;
909                         tx_id = txe->next_id;
910                         txe = txn;
911                         m_seg = m_seg->next;
912                 } while (m_seg != NULL);
913
914                 /*
915                  * The last packet data descriptor needs End Of Packet (EOP)
916                  */
917                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
918                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
919                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
920
921                 /* Set RS bit only on threshold packets' last descriptor */
922                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
923                         PMD_TX_FREE_LOG(DEBUG,
924                                         "Setting RS bit on TXD id="
925                                         "%4u (port=%d queue=%d)",
926                                         tx_last, txq->port_id, txq->queue_id);
927
928                         cmd_type_len |= IXGBE_TXD_CMD_RS;
929
930                         /* Update txq RS bit counters */
931                         txq->nb_tx_used = 0;
932                         txp = NULL;
933                 } else
934                         txp = txd;
935
936                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
937         }
938
939 end_of_tx:
940         /* set RS on last packet in the burst */
941         if (txp != NULL)
942                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
943
944         rte_wmb();
945
946         /*
947          * Set the Transmit Descriptor Tail (TDT)
948          */
949         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
950                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
951                    (unsigned) tx_id, (unsigned) nb_tx);
952         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
953         txq->tx_tail = tx_id;
954
955         return nb_tx;
956 }
957
958 /*********************************************************************
959  *
960  *  TX prep functions
961  *
962  **********************************************************************/
963 uint16_t
964 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
965 {
966         int i, ret;
967         uint64_t ol_flags;
968         struct rte_mbuf *m;
969         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
970
971         for (i = 0; i < nb_pkts; i++) {
972                 m = tx_pkts[i];
973                 ol_flags = m->ol_flags;
974
975                 /**
976                  * Check if packet meets requirements for number of segments
977                  *
978                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
979                  *       non-TSO
980                  */
981
982                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
983                         rte_errno = EINVAL;
984                         return i;
985                 }
986
987                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
988                         rte_errno = ENOTSUP;
989                         return i;
990                 }
991
992                 /* check the size of packet */
993                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
994                         rte_errno = EINVAL;
995                         return i;
996                 }
997
998 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
999                 ret = rte_validate_tx_offload(m);
1000                 if (ret != 0) {
1001                         rte_errno = -ret;
1002                         return i;
1003                 }
1004 #endif
1005                 ret = rte_net_intel_cksum_prepare(m);
1006                 if (ret != 0) {
1007                         rte_errno = -ret;
1008                         return i;
1009                 }
1010         }
1011
1012         return i;
1013 }
1014
1015 /*********************************************************************
1016  *
1017  *  RX functions
1018  *
1019  **********************************************************************/
1020
1021 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1022 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1023 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1024 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1025 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1030 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1031 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1032 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1033 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1036 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1037 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1041 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1045 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1049 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1054
1055 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1078
1079 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1102
1103 /**
1104  * Use 2 different table for normal packet and tunnel packet
1105  * to save the space.
1106  */
1107 const uint32_t
1108         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1109         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1110         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4,
1112         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1114         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1116         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1122         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1124         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1126         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6,
1128         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1130         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1132         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1138         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1140         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6,
1145         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1148         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1151         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6,
1157         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1160         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1163         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1175         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1185                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1187         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1188                 RTE_PTYPE_L2_ETHER |
1189                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1191 };
1192
1193 const uint32_t
1194         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1195         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1210         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1216         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1217                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1219                 RTE_PTYPE_INNER_L4_TCP,
1220         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1221                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1223                 RTE_PTYPE_INNER_L4_TCP,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1227         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1230                 RTE_PTYPE_INNER_L4_TCP,
1231         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1232                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1233                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1234                 RTE_PTYPE_INNER_L3_IPV4,
1235         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1236                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1238                 RTE_PTYPE_INNER_L4_UDP,
1239         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1242                 RTE_PTYPE_INNER_L4_UDP,
1243         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1244                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1246                 RTE_PTYPE_INNER_L4_SCTP,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1250         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1251                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1253                 RTE_PTYPE_INNER_L4_UDP,
1254         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1255                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1257                 RTE_PTYPE_INNER_L4_SCTP,
1258         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1259                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1260                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1261                 RTE_PTYPE_INNER_L3_IPV4,
1262         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1263                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1265                 RTE_PTYPE_INNER_L4_SCTP,
1266         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1267                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                 RTE_PTYPE_INNER_L4_SCTP,
1270         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1273                 RTE_PTYPE_INNER_L4_TCP,
1274         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1275                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1277                 RTE_PTYPE_INNER_L4_UDP,
1278
1279         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1282         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1283                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                 RTE_PTYPE_INNER_L3_IPV4,
1286         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1290         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                 RTE_PTYPE_INNER_L3_IPV6,
1294         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                 RTE_PTYPE_INNER_L3_IPV4,
1298         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1299                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1302         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1303                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                 RTE_PTYPE_INNER_L3_IPV4,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1307                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV4,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1319                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1323                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1324                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1325                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1335                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV4,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1343                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1347                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1351                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1352                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1353                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1354         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1355                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1358         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1359                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1362         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1363                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1366         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1367                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1370 };
1371
1372 int
1373 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1374 {
1375         volatile union ixgbe_adv_rx_desc *rxdp;
1376         struct ixgbe_rx_queue *rxq = rx_queue;
1377         uint16_t desc;
1378
1379         desc = rxq->rx_tail;
1380         rxdp = &rxq->rx_ring[desc];
1381         /* watch for changes in status bit */
1382         pmc->addr = &rxdp->wb.upper.status_error;
1383
1384         /*
1385          * we expect the DD bit to be set to 1 if this descriptor was already
1386          * written to.
1387          */
1388         pmc->val = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1389         pmc->mask = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1390
1391         /* the registers are 32-bit */
1392         pmc->data_sz = sizeof(uint32_t);
1393
1394         return 0;
1395 }
1396
1397 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1398 static inline uint32_t
1399 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1400 {
1401
1402         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1403                 return RTE_PTYPE_UNKNOWN;
1404
1405         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1406
1407         /* For tunnel packet */
1408         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1409                 /* Remove the tunnel bit to save the space. */
1410                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1411                 return ptype_table_tn[pkt_info];
1412         }
1413
1414         /**
1415          * For x550, if it's not tunnel,
1416          * tunnel type bit should be set to 0.
1417          * Reuse 82599's mask.
1418          */
1419         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1420
1421         return ptype_table[pkt_info];
1422 }
1423
1424 static inline uint64_t
1425 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1426 {
1427         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1428                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1429                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1430                 PKT_RX_RSS_HASH, 0, 0, 0,
1431                 0, 0, 0,  PKT_RX_FDIR,
1432         };
1433 #ifdef RTE_LIBRTE_IEEE1588
1434         static uint64_t ip_pkt_etqf_map[8] = {
1435                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1436                 0, 0, 0, 0,
1437         };
1438
1439         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1440                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1441                                 ip_rss_types_map[pkt_info & 0XF];
1442         else
1443                 return ip_rss_types_map[pkt_info & 0XF];
1444 #else
1445         return ip_rss_types_map[pkt_info & 0XF];
1446 #endif
1447 }
1448
1449 static inline uint64_t
1450 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1451 {
1452         uint64_t pkt_flags;
1453
1454         /*
1455          * Check if VLAN present only.
1456          * Do not check whether L3/L4 rx checksum done by NIC or not,
1457          * That can be found from rte_eth_rxmode.offloads flag
1458          */
1459         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1460
1461 #ifdef RTE_LIBRTE_IEEE1588
1462         if (rx_status & IXGBE_RXD_STAT_TMST)
1463                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1464 #endif
1465         return pkt_flags;
1466 }
1467
1468 static inline uint64_t
1469 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1470 {
1471         uint64_t pkt_flags;
1472
1473         /*
1474          * Bit 31: IPE, IPv4 checksum error
1475          * Bit 30: L4I, L4I integrity error
1476          */
1477         static uint64_t error_to_pkt_flags_map[4] = {
1478                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1479                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1480                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1481                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1482         };
1483         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1484                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1485
1486         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1487             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1488                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1489         }
1490
1491 #ifdef RTE_LIB_SECURITY
1492         if (rx_status & IXGBE_RXD_STAT_SECP) {
1493                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1494                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1495                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1496         }
1497 #endif
1498
1499         return pkt_flags;
1500 }
1501
1502 /*
1503  * LOOK_AHEAD defines how many desc statuses to check beyond the
1504  * current descriptor.
1505  * It must be a pound define for optimal performance.
1506  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1507  * function only works with LOOK_AHEAD=8.
1508  */
1509 #define LOOK_AHEAD 8
1510 #if (LOOK_AHEAD != 8)
1511 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1512 #endif
1513 static inline int
1514 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1515 {
1516         volatile union ixgbe_adv_rx_desc *rxdp;
1517         struct ixgbe_rx_entry *rxep;
1518         struct rte_mbuf *mb;
1519         uint16_t pkt_len;
1520         uint64_t pkt_flags;
1521         int nb_dd;
1522         uint32_t s[LOOK_AHEAD];
1523         uint32_t pkt_info[LOOK_AHEAD];
1524         int i, j, nb_rx = 0;
1525         uint32_t status;
1526         uint64_t vlan_flags = rxq->vlan_flags;
1527
1528         /* get references to current descriptor and S/W ring entry */
1529         rxdp = &rxq->rx_ring[rxq->rx_tail];
1530         rxep = &rxq->sw_ring[rxq->rx_tail];
1531
1532         status = rxdp->wb.upper.status_error;
1533         /* check to make sure there is at least 1 packet to receive */
1534         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1535                 return 0;
1536
1537         /*
1538          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1539          * reference packets that are ready to be received.
1540          */
1541         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1542              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1543                 /* Read desc statuses backwards to avoid race condition */
1544                 for (j = 0; j < LOOK_AHEAD; j++)
1545                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1546
1547                 rte_smp_rmb();
1548
1549                 /* Compute how many status bits were set */
1550                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1551                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1552                         ;
1553
1554                 for (j = 0; j < nb_dd; j++)
1555                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1556                                                        lo_dword.data);
1557
1558                 nb_rx += nb_dd;
1559
1560                 /* Translate descriptor info to mbuf format */
1561                 for (j = 0; j < nb_dd; ++j) {
1562                         mb = rxep[j].mbuf;
1563                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1564                                   rxq->crc_len;
1565                         mb->data_len = pkt_len;
1566                         mb->pkt_len = pkt_len;
1567                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1568
1569                         /* convert descriptor fields to rte mbuf flags */
1570                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1571                                 vlan_flags);
1572                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1573                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1574                                         ((uint16_t)pkt_info[j]);
1575                         mb->ol_flags = pkt_flags;
1576                         mb->packet_type =
1577                                 ixgbe_rxd_pkt_info_to_pkt_type
1578                                         (pkt_info[j], rxq->pkt_type_mask);
1579
1580                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1581                                 mb->hash.rss = rte_le_to_cpu_32(
1582                                     rxdp[j].wb.lower.hi_dword.rss);
1583                         else if (pkt_flags & PKT_RX_FDIR) {
1584                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1585                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1586                                     IXGBE_ATR_HASH_MASK;
1587                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1588                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1589                         }
1590                 }
1591
1592                 /* Move mbuf pointers from the S/W ring to the stage */
1593                 for (j = 0; j < LOOK_AHEAD; ++j) {
1594                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1595                 }
1596
1597                 /* stop if all requested packets could not be received */
1598                 if (nb_dd != LOOK_AHEAD)
1599                         break;
1600         }
1601
1602         /* clear software ring entries so we can cleanup correctly */
1603         for (i = 0; i < nb_rx; ++i) {
1604                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1605         }
1606
1607
1608         return nb_rx;
1609 }
1610
1611 static inline int
1612 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1613 {
1614         volatile union ixgbe_adv_rx_desc *rxdp;
1615         struct ixgbe_rx_entry *rxep;
1616         struct rte_mbuf *mb;
1617         uint16_t alloc_idx;
1618         __le64 dma_addr;
1619         int diag, i;
1620
1621         /* allocate buffers in bulk directly into the S/W ring */
1622         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1623         rxep = &rxq->sw_ring[alloc_idx];
1624         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1625                                     rxq->rx_free_thresh);
1626         if (unlikely(diag != 0))
1627                 return -ENOMEM;
1628
1629         rxdp = &rxq->rx_ring[alloc_idx];
1630         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1631                 /* populate the static rte mbuf fields */
1632                 mb = rxep[i].mbuf;
1633                 if (reset_mbuf) {
1634                         mb->port = rxq->port_id;
1635                 }
1636
1637                 rte_mbuf_refcnt_set(mb, 1);
1638                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1639
1640                 /* populate the descriptors */
1641                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1642                 rxdp[i].read.hdr_addr = 0;
1643                 rxdp[i].read.pkt_addr = dma_addr;
1644         }
1645
1646         /* update state of internal queue structure */
1647         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1648         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1649                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1650
1651         /* no errors */
1652         return 0;
1653 }
1654
1655 static inline uint16_t
1656 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1657                          uint16_t nb_pkts)
1658 {
1659         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1660         int i;
1661
1662         /* how many packets are ready to return? */
1663         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1664
1665         /* copy mbuf pointers to the application's packet list */
1666         for (i = 0; i < nb_pkts; ++i)
1667                 rx_pkts[i] = stage[i];
1668
1669         /* update internal queue state */
1670         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1671         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1672
1673         return nb_pkts;
1674 }
1675
1676 static inline uint16_t
1677 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1678              uint16_t nb_pkts)
1679 {
1680         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1681         uint16_t nb_rx = 0;
1682
1683         /* Any previously recv'd pkts will be returned from the Rx stage */
1684         if (rxq->rx_nb_avail)
1685                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1686
1687         /* Scan the H/W ring for packets to receive */
1688         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1689
1690         /* update internal queue state */
1691         rxq->rx_next_avail = 0;
1692         rxq->rx_nb_avail = nb_rx;
1693         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1694
1695         /* if required, allocate new buffers to replenish descriptors */
1696         if (rxq->rx_tail > rxq->rx_free_trigger) {
1697                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1698
1699                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1700                         int i, j;
1701
1702                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1703                                    "queue_id=%u", (unsigned) rxq->port_id,
1704                                    (unsigned) rxq->queue_id);
1705
1706                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1707                                 rxq->rx_free_thresh;
1708
1709                         /*
1710                          * Need to rewind any previous receives if we cannot
1711                          * allocate new buffers to replenish the old ones.
1712                          */
1713                         rxq->rx_nb_avail = 0;
1714                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1715                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1716                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1717
1718                         return 0;
1719                 }
1720
1721                 /* update tail pointer */
1722                 rte_wmb();
1723                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1724                                             cur_free_trigger);
1725         }
1726
1727         if (rxq->rx_tail >= rxq->nb_rx_desc)
1728                 rxq->rx_tail = 0;
1729
1730         /* received any packets this loop? */
1731         if (rxq->rx_nb_avail)
1732                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1733
1734         return 0;
1735 }
1736
1737 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1738 uint16_t
1739 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1740                            uint16_t nb_pkts)
1741 {
1742         uint16_t nb_rx;
1743
1744         if (unlikely(nb_pkts == 0))
1745                 return 0;
1746
1747         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1748                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1749
1750         /* request is relatively large, chunk it up */
1751         nb_rx = 0;
1752         while (nb_pkts) {
1753                 uint16_t ret, n;
1754
1755                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1756                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1757                 nb_rx = (uint16_t)(nb_rx + ret);
1758                 nb_pkts = (uint16_t)(nb_pkts - ret);
1759                 if (ret < n)
1760                         break;
1761         }
1762
1763         return nb_rx;
1764 }
1765
1766 uint16_t
1767 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1768                 uint16_t nb_pkts)
1769 {
1770         struct ixgbe_rx_queue *rxq;
1771         volatile union ixgbe_adv_rx_desc *rx_ring;
1772         volatile union ixgbe_adv_rx_desc *rxdp;
1773         struct ixgbe_rx_entry *sw_ring;
1774         struct ixgbe_rx_entry *rxe;
1775         struct rte_mbuf *rxm;
1776         struct rte_mbuf *nmb;
1777         union ixgbe_adv_rx_desc rxd;
1778         uint64_t dma_addr;
1779         uint32_t staterr;
1780         uint32_t pkt_info;
1781         uint16_t pkt_len;
1782         uint16_t rx_id;
1783         uint16_t nb_rx;
1784         uint16_t nb_hold;
1785         uint64_t pkt_flags;
1786         uint64_t vlan_flags;
1787
1788         nb_rx = 0;
1789         nb_hold = 0;
1790         rxq = rx_queue;
1791         rx_id = rxq->rx_tail;
1792         rx_ring = rxq->rx_ring;
1793         sw_ring = rxq->sw_ring;
1794         vlan_flags = rxq->vlan_flags;
1795         while (nb_rx < nb_pkts) {
1796                 /*
1797                  * The order of operations here is important as the DD status
1798                  * bit must not be read after any other descriptor fields.
1799                  * rx_ring and rxdp are pointing to volatile data so the order
1800                  * of accesses cannot be reordered by the compiler. If they were
1801                  * not volatile, they could be reordered which could lead to
1802                  * using invalid descriptor fields when read from rxd.
1803                  */
1804                 rxdp = &rx_ring[rx_id];
1805                 staterr = rxdp->wb.upper.status_error;
1806                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1807                         break;
1808                 rxd = *rxdp;
1809
1810                 /*
1811                  * End of packet.
1812                  *
1813                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1814                  * is likely to be invalid and to be dropped by the various
1815                  * validation checks performed by the network stack.
1816                  *
1817                  * Allocate a new mbuf to replenish the RX ring descriptor.
1818                  * If the allocation fails:
1819                  *    - arrange for that RX descriptor to be the first one
1820                  *      being parsed the next time the receive function is
1821                  *      invoked [on the same queue].
1822                  *
1823                  *    - Stop parsing the RX ring and return immediately.
1824                  *
1825                  * This policy do not drop the packet received in the RX
1826                  * descriptor for which the allocation of a new mbuf failed.
1827                  * Thus, it allows that packet to be later retrieved if
1828                  * mbuf have been freed in the mean time.
1829                  * As a side effect, holding RX descriptors instead of
1830                  * systematically giving them back to the NIC may lead to
1831                  * RX ring exhaustion situations.
1832                  * However, the NIC can gracefully prevent such situations
1833                  * to happen by sending specific "back-pressure" flow control
1834                  * frames to its peer(s).
1835                  */
1836                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1837                            "ext_err_stat=0x%08x pkt_len=%u",
1838                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1839                            (unsigned) rx_id, (unsigned) staterr,
1840                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1841
1842                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1843                 if (nmb == NULL) {
1844                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1845                                    "queue_id=%u", (unsigned) rxq->port_id,
1846                                    (unsigned) rxq->queue_id);
1847                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1848                         break;
1849                 }
1850
1851                 nb_hold++;
1852                 rxe = &sw_ring[rx_id];
1853                 rx_id++;
1854                 if (rx_id == rxq->nb_rx_desc)
1855                         rx_id = 0;
1856
1857                 /* Prefetch next mbuf while processing current one. */
1858                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1859
1860                 /*
1861                  * When next RX descriptor is on a cache-line boundary,
1862                  * prefetch the next 4 RX descriptors and the next 8 pointers
1863                  * to mbufs.
1864                  */
1865                 if ((rx_id & 0x3) == 0) {
1866                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1867                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1868                 }
1869
1870                 rxm = rxe->mbuf;
1871                 rxe->mbuf = nmb;
1872                 dma_addr =
1873                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1874                 rxdp->read.hdr_addr = 0;
1875                 rxdp->read.pkt_addr = dma_addr;
1876
1877                 /*
1878                  * Initialize the returned mbuf.
1879                  * 1) setup generic mbuf fields:
1880                  *    - number of segments,
1881                  *    - next segment,
1882                  *    - packet length,
1883                  *    - RX port identifier.
1884                  * 2) integrate hardware offload data, if any:
1885                  *    - RSS flag & hash,
1886                  *    - IP checksum flag,
1887                  *    - VLAN TCI, if any,
1888                  *    - error flags.
1889                  */
1890                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1891                                       rxq->crc_len);
1892                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1893                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1894                 rxm->nb_segs = 1;
1895                 rxm->next = NULL;
1896                 rxm->pkt_len = pkt_len;
1897                 rxm->data_len = pkt_len;
1898                 rxm->port = rxq->port_id;
1899
1900                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1901                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1902                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1903
1904                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1905                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1906                 pkt_flags = pkt_flags |
1907                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1908                 rxm->ol_flags = pkt_flags;
1909                 rxm->packet_type =
1910                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1911                                                        rxq->pkt_type_mask);
1912
1913                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1914                         rxm->hash.rss = rte_le_to_cpu_32(
1915                                                 rxd.wb.lower.hi_dword.rss);
1916                 else if (pkt_flags & PKT_RX_FDIR) {
1917                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1918                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1919                                         IXGBE_ATR_HASH_MASK;
1920                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1921                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1922                 }
1923                 /*
1924                  * Store the mbuf address into the next entry of the array
1925                  * of returned packets.
1926                  */
1927                 rx_pkts[nb_rx++] = rxm;
1928         }
1929         rxq->rx_tail = rx_id;
1930
1931         /*
1932          * If the number of free RX descriptors is greater than the RX free
1933          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1934          * register.
1935          * Update the RDT with the value of the last processed RX descriptor
1936          * minus 1, to guarantee that the RDT register is never equal to the
1937          * RDH register, which creates a "full" ring situtation from the
1938          * hardware point of view...
1939          */
1940         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1941         if (nb_hold > rxq->rx_free_thresh) {
1942                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1943                            "nb_hold=%u nb_rx=%u",
1944                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1945                            (unsigned) rx_id, (unsigned) nb_hold,
1946                            (unsigned) nb_rx);
1947                 rx_id = (uint16_t) ((rx_id == 0) ?
1948                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1949                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1950                 nb_hold = 0;
1951         }
1952         rxq->nb_rx_hold = nb_hold;
1953         return nb_rx;
1954 }
1955
1956 /**
1957  * Detect an RSC descriptor.
1958  */
1959 static inline uint32_t
1960 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1961 {
1962         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1963                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1964 }
1965
1966 /**
1967  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1968  *
1969  * Fill the following info in the HEAD buffer of the Rx cluster:
1970  *    - RX port identifier
1971  *    - hardware offload data, if any:
1972  *      - RSS flag & hash
1973  *      - IP checksum flag
1974  *      - VLAN TCI, if any
1975  *      - error flags
1976  * @head HEAD of the packet cluster
1977  * @desc HW descriptor to get data from
1978  * @rxq Pointer to the Rx queue
1979  */
1980 static inline void
1981 ixgbe_fill_cluster_head_buf(
1982         struct rte_mbuf *head,
1983         union ixgbe_adv_rx_desc *desc,
1984         struct ixgbe_rx_queue *rxq,
1985         uint32_t staterr)
1986 {
1987         uint32_t pkt_info;
1988         uint64_t pkt_flags;
1989
1990         head->port = rxq->port_id;
1991
1992         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1993          * set in the pkt_flags field.
1994          */
1995         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1996         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1997         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1998         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1999         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2000         head->ol_flags = pkt_flags;
2001         head->packet_type =
2002                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2003
2004         if (likely(pkt_flags & PKT_RX_RSS_HASH))
2005                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2006         else if (pkt_flags & PKT_RX_FDIR) {
2007                 head->hash.fdir.hash =
2008                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2009                                                           & IXGBE_ATR_HASH_MASK;
2010                 head->hash.fdir.id =
2011                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2012         }
2013 }
2014
2015 /**
2016  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2017  *
2018  * @rx_queue Rx queue handle
2019  * @rx_pkts table of received packets
2020  * @nb_pkts size of rx_pkts table
2021  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2022  *
2023  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2024  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2025  *
2026  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2027  * 1) When non-EOP RSC completion arrives:
2028  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2029  *       segment's data length.
2030  *    b) Set the "next" pointer of the current segment to point to the segment
2031  *       at the NEXTP index.
2032  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2033  *       in the sw_rsc_ring.
2034  * 2) When EOP arrives we just update the cluster's total length and offload
2035  *    flags and deliver the cluster up to the upper layers. In our case - put it
2036  *    in the rx_pkts table.
2037  *
2038  * Returns the number of received packets/clusters (according to the "bulk
2039  * receive" interface).
2040  */
2041 static inline uint16_t
2042 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2043                     bool bulk_alloc)
2044 {
2045         struct ixgbe_rx_queue *rxq = rx_queue;
2046         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2047         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2048         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2049         uint16_t rx_id = rxq->rx_tail;
2050         uint16_t nb_rx = 0;
2051         uint16_t nb_hold = rxq->nb_rx_hold;
2052         uint16_t prev_id = rxq->rx_tail;
2053
2054         while (nb_rx < nb_pkts) {
2055                 bool eop;
2056                 struct ixgbe_rx_entry *rxe;
2057                 struct ixgbe_scattered_rx_entry *sc_entry;
2058                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2059                 struct ixgbe_rx_entry *next_rxe = NULL;
2060                 struct rte_mbuf *first_seg;
2061                 struct rte_mbuf *rxm;
2062                 struct rte_mbuf *nmb = NULL;
2063                 union ixgbe_adv_rx_desc rxd;
2064                 uint16_t data_len;
2065                 uint16_t next_id;
2066                 volatile union ixgbe_adv_rx_desc *rxdp;
2067                 uint32_t staterr;
2068
2069 next_desc:
2070                 /*
2071                  * The code in this whole file uses the volatile pointer to
2072                  * ensure the read ordering of the status and the rest of the
2073                  * descriptor fields (on the compiler level only!!!). This is so
2074                  * UGLY - why not to just use the compiler barrier instead? DPDK
2075                  * even has the rte_compiler_barrier() for that.
2076                  *
2077                  * But most importantly this is just wrong because this doesn't
2078                  * ensure memory ordering in a general case at all. For
2079                  * instance, DPDK is supposed to work on Power CPUs where
2080                  * compiler barrier may just not be enough!
2081                  *
2082                  * I tried to write only this function properly to have a
2083                  * starting point (as a part of an LRO/RSC series) but the
2084                  * compiler cursed at me when I tried to cast away the
2085                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2086                  * keeping it the way it is for now.
2087                  *
2088                  * The code in this file is broken in so many other places and
2089                  * will just not work on a big endian CPU anyway therefore the
2090                  * lines below will have to be revisited together with the rest
2091                  * of the ixgbe PMD.
2092                  *
2093                  * TODO:
2094                  *    - Get rid of "volatile" and let the compiler do its job.
2095                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2096                  *      memory ordering below.
2097                  */
2098                 rxdp = &rx_ring[rx_id];
2099                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2100
2101                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2102                         break;
2103
2104                 rxd = *rxdp;
2105
2106                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2107                                   "staterr=0x%x data_len=%u",
2108                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2109                            rte_le_to_cpu_16(rxd.wb.upper.length));
2110
2111                 if (!bulk_alloc) {
2112                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2113                         if (nmb == NULL) {
2114                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2115                                                   "port_id=%u queue_id=%u",
2116                                            rxq->port_id, rxq->queue_id);
2117
2118                                 rte_eth_devices[rxq->port_id].data->
2119                                                         rx_mbuf_alloc_failed++;
2120                                 break;
2121                         }
2122                 } else if (nb_hold > rxq->rx_free_thresh) {
2123                         uint16_t next_rdt = rxq->rx_free_trigger;
2124
2125                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2126                                 rte_wmb();
2127                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2128                                                         rxq->rdt_reg_addr,
2129                                                         next_rdt);
2130                                 nb_hold -= rxq->rx_free_thresh;
2131                         } else {
2132                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2133                                                   "port_id=%u queue_id=%u",
2134                                            rxq->port_id, rxq->queue_id);
2135
2136                                 rte_eth_devices[rxq->port_id].data->
2137                                                         rx_mbuf_alloc_failed++;
2138                                 break;
2139                         }
2140                 }
2141
2142                 nb_hold++;
2143                 rxe = &sw_ring[rx_id];
2144                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2145
2146                 next_id = rx_id + 1;
2147                 if (next_id == rxq->nb_rx_desc)
2148                         next_id = 0;
2149
2150                 /* Prefetch next mbuf while processing current one. */
2151                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2152
2153                 /*
2154                  * When next RX descriptor is on a cache-line boundary,
2155                  * prefetch the next 4 RX descriptors and the next 4 pointers
2156                  * to mbufs.
2157                  */
2158                 if ((next_id & 0x3) == 0) {
2159                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2160                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2161                 }
2162
2163                 rxm = rxe->mbuf;
2164
2165                 if (!bulk_alloc) {
2166                         __le64 dma =
2167                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2168                         /*
2169                          * Update RX descriptor with the physical address of the
2170                          * new data buffer of the new allocated mbuf.
2171                          */
2172                         rxe->mbuf = nmb;
2173
2174                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2175                         rxdp->read.hdr_addr = 0;
2176                         rxdp->read.pkt_addr = dma;
2177                 } else
2178                         rxe->mbuf = NULL;
2179
2180                 /*
2181                  * Set data length & data buffer address of mbuf.
2182                  */
2183                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2184                 rxm->data_len = data_len;
2185
2186                 if (!eop) {
2187                         uint16_t nextp_id;
2188                         /*
2189                          * Get next descriptor index:
2190                          *  - For RSC it's in the NEXTP field.
2191                          *  - For a scattered packet - it's just a following
2192                          *    descriptor.
2193                          */
2194                         if (ixgbe_rsc_count(&rxd))
2195                                 nextp_id =
2196                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2197                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2198                         else
2199                                 nextp_id = next_id;
2200
2201                         next_sc_entry = &sw_sc_ring[nextp_id];
2202                         next_rxe = &sw_ring[nextp_id];
2203                         rte_ixgbe_prefetch(next_rxe);
2204                 }
2205
2206                 sc_entry = &sw_sc_ring[rx_id];
2207                 first_seg = sc_entry->fbuf;
2208                 sc_entry->fbuf = NULL;
2209
2210                 /*
2211                  * If this is the first buffer of the received packet,
2212                  * set the pointer to the first mbuf of the packet and
2213                  * initialize its context.
2214                  * Otherwise, update the total length and the number of segments
2215                  * of the current scattered packet, and update the pointer to
2216                  * the last mbuf of the current packet.
2217                  */
2218                 if (first_seg == NULL) {
2219                         first_seg = rxm;
2220                         first_seg->pkt_len = data_len;
2221                         first_seg->nb_segs = 1;
2222                 } else {
2223                         first_seg->pkt_len += data_len;
2224                         first_seg->nb_segs++;
2225                 }
2226
2227                 prev_id = rx_id;
2228                 rx_id = next_id;
2229
2230                 /*
2231                  * If this is not the last buffer of the received packet, update
2232                  * the pointer to the first mbuf at the NEXTP entry in the
2233                  * sw_sc_ring and continue to parse the RX ring.
2234                  */
2235                 if (!eop && next_rxe) {
2236                         rxm->next = next_rxe->mbuf;
2237                         next_sc_entry->fbuf = first_seg;
2238                         goto next_desc;
2239                 }
2240
2241                 /* Initialize the first mbuf of the returned packet */
2242                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2243
2244                 /*
2245                  * Deal with the case, when HW CRC srip is disabled.
2246                  * That can't happen when LRO is enabled, but still could
2247                  * happen for scattered RX mode.
2248                  */
2249                 first_seg->pkt_len -= rxq->crc_len;
2250                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2251                         struct rte_mbuf *lp;
2252
2253                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2254                                 ;
2255
2256                         first_seg->nb_segs--;
2257                         lp->data_len -= rxq->crc_len - rxm->data_len;
2258                         lp->next = NULL;
2259                         rte_pktmbuf_free_seg(rxm);
2260                 } else
2261                         rxm->data_len -= rxq->crc_len;
2262
2263                 /* Prefetch data of first segment, if configured to do so. */
2264                 rte_packet_prefetch((char *)first_seg->buf_addr +
2265                         first_seg->data_off);
2266
2267                 /*
2268                  * Store the mbuf address into the next entry of the array
2269                  * of returned packets.
2270                  */
2271                 rx_pkts[nb_rx++] = first_seg;
2272         }
2273
2274         /*
2275          * Record index of the next RX descriptor to probe.
2276          */
2277         rxq->rx_tail = rx_id;
2278
2279         /*
2280          * If the number of free RX descriptors is greater than the RX free
2281          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2282          * register.
2283          * Update the RDT with the value of the last processed RX descriptor
2284          * minus 1, to guarantee that the RDT register is never equal to the
2285          * RDH register, which creates a "full" ring situtation from the
2286          * hardware point of view...
2287          */
2288         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2289                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2290                            "nb_hold=%u nb_rx=%u",
2291                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2292
2293                 rte_wmb();
2294                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2295                 nb_hold = 0;
2296         }
2297
2298         rxq->nb_rx_hold = nb_hold;
2299         return nb_rx;
2300 }
2301
2302 uint16_t
2303 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2304                                  uint16_t nb_pkts)
2305 {
2306         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2307 }
2308
2309 uint16_t
2310 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2311                                uint16_t nb_pkts)
2312 {
2313         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2314 }
2315
2316 /*********************************************************************
2317  *
2318  *  Queue management functions
2319  *
2320  **********************************************************************/
2321
2322 static void __rte_cold
2323 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2324 {
2325         unsigned i;
2326
2327         if (txq->sw_ring != NULL) {
2328                 for (i = 0; i < txq->nb_tx_desc; i++) {
2329                         if (txq->sw_ring[i].mbuf != NULL) {
2330                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2331                                 txq->sw_ring[i].mbuf = NULL;
2332                         }
2333                 }
2334         }
2335 }
2336
2337 static int
2338 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2339 {
2340         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2341         uint16_t i, tx_last, tx_id;
2342         uint16_t nb_tx_free_last;
2343         uint16_t nb_tx_to_clean;
2344         uint32_t pkt_cnt;
2345
2346         /* Start free mbuf from the next of tx_tail */
2347         tx_last = txq->tx_tail;
2348         tx_id  = swr_ring[tx_last].next_id;
2349
2350         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2351                 return 0;
2352
2353         nb_tx_to_clean = txq->nb_tx_free;
2354         nb_tx_free_last = txq->nb_tx_free;
2355         if (!free_cnt)
2356                 free_cnt = txq->nb_tx_desc;
2357
2358         /* Loop through swr_ring to count the amount of
2359          * freeable mubfs and packets.
2360          */
2361         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2362                 for (i = 0; i < nb_tx_to_clean &&
2363                         pkt_cnt < free_cnt &&
2364                         tx_id != tx_last; i++) {
2365                         if (swr_ring[tx_id].mbuf != NULL) {
2366                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2367                                 swr_ring[tx_id].mbuf = NULL;
2368
2369                                 /*
2370                                  * last segment in the packet,
2371                                  * increment packet count
2372                                  */
2373                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2374                         }
2375
2376                         tx_id = swr_ring[tx_id].next_id;
2377                 }
2378
2379                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2380                         txq->nb_tx_free || tx_id == tx_last)
2381                         break;
2382
2383                 if (pkt_cnt < free_cnt) {
2384                         if (ixgbe_xmit_cleanup(txq))
2385                                 break;
2386
2387                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2388                         nb_tx_free_last = txq->nb_tx_free;
2389                 }
2390         }
2391
2392         return (int)pkt_cnt;
2393 }
2394
2395 static int
2396 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2397                         uint32_t free_cnt)
2398 {
2399         int i, n, cnt;
2400
2401         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2402                 free_cnt = txq->nb_tx_desc;
2403
2404         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2405
2406         for (i = 0; i < cnt; i += n) {
2407                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2408                         break;
2409
2410                 n = ixgbe_tx_free_bufs(txq);
2411
2412                 if (n == 0)
2413                         break;
2414         }
2415
2416         return i;
2417 }
2418
2419 static int
2420 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2421                         uint32_t free_cnt __rte_unused)
2422 {
2423         return -ENOTSUP;
2424 }
2425
2426 int
2427 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2428 {
2429         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2430         if (txq->offloads == 0 &&
2431 #ifdef RTE_LIB_SECURITY
2432                         !(txq->using_ipsec) &&
2433 #endif
2434                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2435                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2436                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2437                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2438                                         txq->sw_ring_v != NULL)) {
2439                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2440                 } else {
2441                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2442                 }
2443         }
2444
2445         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2446 }
2447
2448 static void __rte_cold
2449 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2450 {
2451         if (txq != NULL &&
2452             txq->sw_ring != NULL)
2453                 rte_free(txq->sw_ring);
2454 }
2455
2456 static void __rte_cold
2457 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2458 {
2459         if (txq != NULL && txq->ops != NULL) {
2460                 txq->ops->release_mbufs(txq);
2461                 txq->ops->free_swring(txq);
2462                 rte_free(txq);
2463         }
2464 }
2465
2466 void __rte_cold
2467 ixgbe_dev_tx_queue_release(void *txq)
2468 {
2469         ixgbe_tx_queue_release(txq);
2470 }
2471
2472 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2473 static void __rte_cold
2474 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2475 {
2476         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2477         struct ixgbe_tx_entry *txe = txq->sw_ring;
2478         uint16_t prev, i;
2479
2480         /* Zero out HW ring memory */
2481         for (i = 0; i < txq->nb_tx_desc; i++) {
2482                 txq->tx_ring[i] = zeroed_desc;
2483         }
2484
2485         /* Initialize SW ring entries */
2486         prev = (uint16_t) (txq->nb_tx_desc - 1);
2487         for (i = 0; i < txq->nb_tx_desc; i++) {
2488                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2489
2490                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2491                 txe[i].mbuf = NULL;
2492                 txe[i].last_id = i;
2493                 txe[prev].next_id = i;
2494                 prev = i;
2495         }
2496
2497         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2498         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2499
2500         txq->tx_tail = 0;
2501         txq->nb_tx_used = 0;
2502         /*
2503          * Always allow 1 descriptor to be un-allocated to avoid
2504          * a H/W race condition
2505          */
2506         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2507         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2508         txq->ctx_curr = 0;
2509         memset((void *)&txq->ctx_cache, 0,
2510                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2511 }
2512
2513 static const struct ixgbe_txq_ops def_txq_ops = {
2514         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2515         .free_swring = ixgbe_tx_free_swring,
2516         .reset = ixgbe_reset_tx_queue,
2517 };
2518
2519 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2520  * the queue parameters. Used in tx_queue_setup by primary process and then
2521  * in dev_init by secondary process when attaching to an existing ethdev.
2522  */
2523 void __rte_cold
2524 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2525 {
2526         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2527         if ((txq->offloads == 0) &&
2528 #ifdef RTE_LIB_SECURITY
2529                         !(txq->using_ipsec) &&
2530 #endif
2531                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2532                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2533                 dev->tx_pkt_prepare = NULL;
2534                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2535                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2536                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2537                                         ixgbe_txq_vec_setup(txq) == 0)) {
2538                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2539                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2540                 } else
2541                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2542         } else {
2543                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2544                 PMD_INIT_LOG(DEBUG,
2545                                 " - offloads = 0x%" PRIx64,
2546                                 txq->offloads);
2547                 PMD_INIT_LOG(DEBUG,
2548                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2549                                 (unsigned long)txq->tx_rs_thresh,
2550                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2551                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2552                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2553         }
2554 }
2555
2556 uint64_t
2557 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2558 {
2559         RTE_SET_USED(dev);
2560
2561         return 0;
2562 }
2563
2564 uint64_t
2565 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2566 {
2567         uint64_t tx_offload_capa;
2568         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2569
2570         tx_offload_capa =
2571                 DEV_TX_OFFLOAD_VLAN_INSERT |
2572                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2573                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2574                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2575                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2576                 DEV_TX_OFFLOAD_TCP_TSO     |
2577                 DEV_TX_OFFLOAD_MULTI_SEGS;
2578
2579         if (hw->mac.type == ixgbe_mac_82599EB ||
2580             hw->mac.type == ixgbe_mac_X540)
2581                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2582
2583         if (hw->mac.type == ixgbe_mac_X550 ||
2584             hw->mac.type == ixgbe_mac_X550EM_x ||
2585             hw->mac.type == ixgbe_mac_X550EM_a)
2586                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2587
2588 #ifdef RTE_LIB_SECURITY
2589         if (dev->security_ctx)
2590                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2591 #endif
2592         return tx_offload_capa;
2593 }
2594
2595 int __rte_cold
2596 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2597                          uint16_t queue_idx,
2598                          uint16_t nb_desc,
2599                          unsigned int socket_id,
2600                          const struct rte_eth_txconf *tx_conf)
2601 {
2602         const struct rte_memzone *tz;
2603         struct ixgbe_tx_queue *txq;
2604         struct ixgbe_hw     *hw;
2605         uint16_t tx_rs_thresh, tx_free_thresh;
2606         uint64_t offloads;
2607
2608         PMD_INIT_FUNC_TRACE();
2609         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2610
2611         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2612
2613         /*
2614          * Validate number of transmit descriptors.
2615          * It must not exceed hardware maximum, and must be multiple
2616          * of IXGBE_ALIGN.
2617          */
2618         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2619                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2620                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2621                 return -EINVAL;
2622         }
2623
2624         /*
2625          * The following two parameters control the setting of the RS bit on
2626          * transmit descriptors.
2627          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2628          * descriptors have been used.
2629          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2630          * descriptors are used or if the number of descriptors required
2631          * to transmit a packet is greater than the number of free TX
2632          * descriptors.
2633          * The following constraints must be satisfied:
2634          *  tx_rs_thresh must be greater than 0.
2635          *  tx_rs_thresh must be less than the size of the ring minus 2.
2636          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2637          *  tx_rs_thresh must be a divisor of the ring size.
2638          *  tx_free_thresh must be greater than 0.
2639          *  tx_free_thresh must be less than the size of the ring minus 3.
2640          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2641          * One descriptor in the TX ring is used as a sentinel to avoid a
2642          * H/W race condition, hence the maximum threshold constraints.
2643          * When set to zero use default values.
2644          */
2645         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2646                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2647         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2648         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2649                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2650         if (tx_conf->tx_rs_thresh > 0)
2651                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2652         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2653                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2654                              "exceed nb_desc. (tx_rs_thresh=%u "
2655                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2656                              (unsigned int)tx_rs_thresh,
2657                              (unsigned int)tx_free_thresh,
2658                              (unsigned int)nb_desc,
2659                              (int)dev->data->port_id,
2660                              (int)queue_idx);
2661                 return -(EINVAL);
2662         }
2663         if (tx_rs_thresh >= (nb_desc - 2)) {
2664                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2665                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2666                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2667                         (int)dev->data->port_id, (int)queue_idx);
2668                 return -(EINVAL);
2669         }
2670         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2671                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2672                         "(tx_rs_thresh=%u port=%d queue=%d)",
2673                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2674                         (int)dev->data->port_id, (int)queue_idx);
2675                 return -(EINVAL);
2676         }
2677         if (tx_free_thresh >= (nb_desc - 3)) {
2678                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2679                              "tx_free_thresh must be less than the number of "
2680                              "TX descriptors minus 3. (tx_free_thresh=%u "
2681                              "port=%d queue=%d)",
2682                              (unsigned int)tx_free_thresh,
2683                              (int)dev->data->port_id, (int)queue_idx);
2684                 return -(EINVAL);
2685         }
2686         if (tx_rs_thresh > tx_free_thresh) {
2687                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2688                              "tx_free_thresh. (tx_free_thresh=%u "
2689                              "tx_rs_thresh=%u port=%d queue=%d)",
2690                              (unsigned int)tx_free_thresh,
2691                              (unsigned int)tx_rs_thresh,
2692                              (int)dev->data->port_id,
2693                              (int)queue_idx);
2694                 return -(EINVAL);
2695         }
2696         if ((nb_desc % tx_rs_thresh) != 0) {
2697                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2698                              "number of TX descriptors. (tx_rs_thresh=%u "
2699                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2700                              (int)dev->data->port_id, (int)queue_idx);
2701                 return -(EINVAL);
2702         }
2703
2704         /*
2705          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2706          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2707          * by the NIC and all descriptors are written back after the NIC
2708          * accumulates WTHRESH descriptors.
2709          */
2710         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2711                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2712                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2713                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2714                              (int)dev->data->port_id, (int)queue_idx);
2715                 return -(EINVAL);
2716         }
2717
2718         /* Free memory prior to re-allocation if needed... */
2719         if (dev->data->tx_queues[queue_idx] != NULL) {
2720                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2721                 dev->data->tx_queues[queue_idx] = NULL;
2722         }
2723
2724         /* First allocate the tx queue data structure */
2725         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2726                                  RTE_CACHE_LINE_SIZE, socket_id);
2727         if (txq == NULL)
2728                 return -ENOMEM;
2729
2730         /*
2731          * Allocate TX ring hardware descriptors. A memzone large enough to
2732          * handle the maximum ring size is allocated in order to allow for
2733          * resizing in later calls to the queue setup function.
2734          */
2735         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2736                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2737                         IXGBE_ALIGN, socket_id);
2738         if (tz == NULL) {
2739                 ixgbe_tx_queue_release(txq);
2740                 return -ENOMEM;
2741         }
2742
2743         txq->nb_tx_desc = nb_desc;
2744         txq->tx_rs_thresh = tx_rs_thresh;
2745         txq->tx_free_thresh = tx_free_thresh;
2746         txq->pthresh = tx_conf->tx_thresh.pthresh;
2747         txq->hthresh = tx_conf->tx_thresh.hthresh;
2748         txq->wthresh = tx_conf->tx_thresh.wthresh;
2749         txq->queue_id = queue_idx;
2750         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2751                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2752         txq->port_id = dev->data->port_id;
2753         txq->offloads = offloads;
2754         txq->ops = &def_txq_ops;
2755         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2756 #ifdef RTE_LIB_SECURITY
2757         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2758                         DEV_TX_OFFLOAD_SECURITY);
2759 #endif
2760
2761         /*
2762          * Modification to set VFTDT for virtual function if vf is detected
2763          */
2764         if (hw->mac.type == ixgbe_mac_82599_vf ||
2765             hw->mac.type == ixgbe_mac_X540_vf ||
2766             hw->mac.type == ixgbe_mac_X550_vf ||
2767             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2768             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2769                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2770         else
2771                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2772
2773         txq->tx_ring_phys_addr = tz->iova;
2774         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2775
2776         /* Allocate software ring */
2777         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2778                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2779                                 RTE_CACHE_LINE_SIZE, socket_id);
2780         if (txq->sw_ring == NULL) {
2781                 ixgbe_tx_queue_release(txq);
2782                 return -ENOMEM;
2783         }
2784         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2785                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2786
2787         /* set up vector or scalar TX function as appropriate */
2788         ixgbe_set_tx_function(dev, txq);
2789
2790         txq->ops->reset(txq);
2791
2792         dev->data->tx_queues[queue_idx] = txq;
2793
2794
2795         return 0;
2796 }
2797
2798 /**
2799  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2800  *
2801  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2802  * in the sw_rsc_ring is not set to NULL but rather points to the next
2803  * mbuf of this RSC aggregation (that has not been completed yet and still
2804  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2805  * will just free first "nb_segs" segments of the cluster explicitly by calling
2806  * an rte_pktmbuf_free_seg().
2807  *
2808  * @m scattered cluster head
2809  */
2810 static void __rte_cold
2811 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2812 {
2813         uint16_t i, nb_segs = m->nb_segs;
2814         struct rte_mbuf *next_seg;
2815
2816         for (i = 0; i < nb_segs; i++) {
2817                 next_seg = m->next;
2818                 rte_pktmbuf_free_seg(m);
2819                 m = next_seg;
2820         }
2821 }
2822
2823 static void __rte_cold
2824 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2825 {
2826         unsigned i;
2827
2828         /* SSE Vector driver has a different way of releasing mbufs. */
2829         if (rxq->rx_using_sse) {
2830                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2831                 return;
2832         }
2833
2834         if (rxq->sw_ring != NULL) {
2835                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2836                         if (rxq->sw_ring[i].mbuf != NULL) {
2837                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2838                                 rxq->sw_ring[i].mbuf = NULL;
2839                         }
2840                 }
2841                 if (rxq->rx_nb_avail) {
2842                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2843                                 struct rte_mbuf *mb;
2844
2845                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2846                                 rte_pktmbuf_free_seg(mb);
2847                         }
2848                         rxq->rx_nb_avail = 0;
2849                 }
2850         }
2851
2852         if (rxq->sw_sc_ring)
2853                 for (i = 0; i < rxq->nb_rx_desc; i++)
2854                         if (rxq->sw_sc_ring[i].fbuf) {
2855                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2856                                 rxq->sw_sc_ring[i].fbuf = NULL;
2857                         }
2858 }
2859
2860 static void __rte_cold
2861 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2862 {
2863         if (rxq != NULL) {
2864                 ixgbe_rx_queue_release_mbufs(rxq);
2865                 rte_free(rxq->sw_ring);
2866                 rte_free(rxq->sw_sc_ring);
2867                 rte_free(rxq);
2868         }
2869 }
2870
2871 void __rte_cold
2872 ixgbe_dev_rx_queue_release(void *rxq)
2873 {
2874         ixgbe_rx_queue_release(rxq);
2875 }
2876
2877 /*
2878  * Check if Rx Burst Bulk Alloc function can be used.
2879  * Return
2880  *        0: the preconditions are satisfied and the bulk allocation function
2881  *           can be used.
2882  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2883  *           function must be used.
2884  */
2885 static inline int __rte_cold
2886 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2887 {
2888         int ret = 0;
2889
2890         /*
2891          * Make sure the following pre-conditions are satisfied:
2892          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2893          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2894          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2895          * Scattered packets are not supported.  This should be checked
2896          * outside of this function.
2897          */
2898         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2899                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2900                              "rxq->rx_free_thresh=%d, "
2901                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2902                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2903                 ret = -EINVAL;
2904         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2905                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2906                              "rxq->rx_free_thresh=%d, "
2907                              "rxq->nb_rx_desc=%d",
2908                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2909                 ret = -EINVAL;
2910         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2911                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2912                              "rxq->nb_rx_desc=%d, "
2913                              "rxq->rx_free_thresh=%d",
2914                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2915                 ret = -EINVAL;
2916         }
2917
2918         return ret;
2919 }
2920
2921 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2922 static void __rte_cold
2923 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2924 {
2925         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2926         unsigned i;
2927         uint16_t len = rxq->nb_rx_desc;
2928
2929         /*
2930          * By default, the Rx queue setup function allocates enough memory for
2931          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2932          * extra memory at the end of the descriptor ring to be zero'd out.
2933          */
2934         if (adapter->rx_bulk_alloc_allowed)
2935                 /* zero out extra memory */
2936                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2937
2938         /*
2939          * Zero out HW ring memory. Zero out extra memory at the end of
2940          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2941          * reads extra memory as zeros.
2942          */
2943         for (i = 0; i < len; i++) {
2944                 rxq->rx_ring[i] = zeroed_desc;
2945         }
2946
2947         /*
2948          * initialize extra software ring entries. Space for these extra
2949          * entries is always allocated
2950          */
2951         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2952         for (i = rxq->nb_rx_desc; i < len; ++i) {
2953                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2954         }
2955
2956         rxq->rx_nb_avail = 0;
2957         rxq->rx_next_avail = 0;
2958         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2959         rxq->rx_tail = 0;
2960         rxq->nb_rx_hold = 0;
2961         rxq->pkt_first_seg = NULL;
2962         rxq->pkt_last_seg = NULL;
2963
2964 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2965         rxq->rxrearm_start = 0;
2966         rxq->rxrearm_nb = 0;
2967 #endif
2968 }
2969
2970 static int
2971 ixgbe_is_vf(struct rte_eth_dev *dev)
2972 {
2973         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2974
2975         switch (hw->mac.type) {
2976         case ixgbe_mac_82599_vf:
2977         case ixgbe_mac_X540_vf:
2978         case ixgbe_mac_X550_vf:
2979         case ixgbe_mac_X550EM_x_vf:
2980         case ixgbe_mac_X550EM_a_vf:
2981                 return 1;
2982         default:
2983                 return 0;
2984         }
2985 }
2986
2987 uint64_t
2988 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2989 {
2990         uint64_t offloads = 0;
2991         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2992
2993         if (hw->mac.type != ixgbe_mac_82598EB)
2994                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2995
2996         return offloads;
2997 }
2998
2999 uint64_t
3000 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3001 {
3002         uint64_t offloads;
3003         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3004
3005         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
3006                    DEV_RX_OFFLOAD_UDP_CKSUM   |
3007                    DEV_RX_OFFLOAD_TCP_CKSUM   |
3008                    DEV_RX_OFFLOAD_KEEP_CRC    |
3009                    DEV_RX_OFFLOAD_JUMBO_FRAME |
3010                    DEV_RX_OFFLOAD_VLAN_FILTER |
3011                    DEV_RX_OFFLOAD_SCATTER |
3012                    DEV_RX_OFFLOAD_RSS_HASH;
3013
3014         if (hw->mac.type == ixgbe_mac_82598EB)
3015                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3016
3017         if (ixgbe_is_vf(dev) == 0)
3018                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
3019
3020         /*
3021          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3022          * mode.
3023          */
3024         if ((hw->mac.type == ixgbe_mac_82599EB ||
3025              hw->mac.type == ixgbe_mac_X540 ||
3026              hw->mac.type == ixgbe_mac_X550) &&
3027             !RTE_ETH_DEV_SRIOV(dev).active)
3028                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
3029
3030         if (hw->mac.type == ixgbe_mac_82599EB ||
3031             hw->mac.type == ixgbe_mac_X540)
3032                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3033
3034         if (hw->mac.type == ixgbe_mac_X550 ||
3035             hw->mac.type == ixgbe_mac_X550EM_x ||
3036             hw->mac.type == ixgbe_mac_X550EM_a)
3037                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3038
3039 #ifdef RTE_LIB_SECURITY
3040         if (dev->security_ctx)
3041                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3042 #endif
3043
3044         return offloads;
3045 }
3046
3047 int __rte_cold
3048 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3049                          uint16_t queue_idx,
3050                          uint16_t nb_desc,
3051                          unsigned int socket_id,
3052                          const struct rte_eth_rxconf *rx_conf,
3053                          struct rte_mempool *mp)
3054 {
3055         const struct rte_memzone *rz;
3056         struct ixgbe_rx_queue *rxq;
3057         struct ixgbe_hw     *hw;
3058         uint16_t len;
3059         struct ixgbe_adapter *adapter = dev->data->dev_private;
3060         uint64_t offloads;
3061
3062         PMD_INIT_FUNC_TRACE();
3063         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3064
3065         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3066
3067         /*
3068          * Validate number of receive descriptors.
3069          * It must not exceed hardware maximum, and must be multiple
3070          * of IXGBE_ALIGN.
3071          */
3072         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3073                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3074                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3075                 return -EINVAL;
3076         }
3077
3078         /* Free memory prior to re-allocation if needed... */
3079         if (dev->data->rx_queues[queue_idx] != NULL) {
3080                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3081                 dev->data->rx_queues[queue_idx] = NULL;
3082         }
3083
3084         /* First allocate the rx queue data structure */
3085         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3086                                  RTE_CACHE_LINE_SIZE, socket_id);
3087         if (rxq == NULL)
3088                 return -ENOMEM;
3089         rxq->mb_pool = mp;
3090         rxq->nb_rx_desc = nb_desc;
3091         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3092         rxq->queue_id = queue_idx;
3093         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3094                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3095         rxq->port_id = dev->data->port_id;
3096         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3097                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3098         else
3099                 rxq->crc_len = 0;
3100         rxq->drop_en = rx_conf->rx_drop_en;
3101         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3102         rxq->offloads = offloads;
3103
3104         /*
3105          * The packet type in RX descriptor is different for different NICs.
3106          * Some bits are used for x550 but reserved for other NICS.
3107          * So set different masks for different NICs.
3108          */
3109         if (hw->mac.type == ixgbe_mac_X550 ||
3110             hw->mac.type == ixgbe_mac_X550EM_x ||
3111             hw->mac.type == ixgbe_mac_X550EM_a ||
3112             hw->mac.type == ixgbe_mac_X550_vf ||
3113             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3114             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3115                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3116         else
3117                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3118
3119         /*
3120          * Allocate RX ring hardware descriptors. A memzone large enough to
3121          * handle the maximum ring size is allocated in order to allow for
3122          * resizing in later calls to the queue setup function.
3123          */
3124         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3125                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3126         if (rz == NULL) {
3127                 ixgbe_rx_queue_release(rxq);
3128                 return -ENOMEM;
3129         }
3130
3131         /*
3132          * Zero init all the descriptors in the ring.
3133          */
3134         memset(rz->addr, 0, RX_RING_SZ);
3135
3136         /*
3137          * Modified to setup VFRDT for Virtual Function
3138          */
3139         if (hw->mac.type == ixgbe_mac_82599_vf ||
3140             hw->mac.type == ixgbe_mac_X540_vf ||
3141             hw->mac.type == ixgbe_mac_X550_vf ||
3142             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3143             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3144                 rxq->rdt_reg_addr =
3145                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3146                 rxq->rdh_reg_addr =
3147                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3148         } else {
3149                 rxq->rdt_reg_addr =
3150                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3151                 rxq->rdh_reg_addr =
3152                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3153         }
3154
3155         rxq->rx_ring_phys_addr = rz->iova;
3156         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3157
3158         /*
3159          * Certain constraints must be met in order to use the bulk buffer
3160          * allocation Rx burst function. If any of Rx queues doesn't meet them
3161          * the feature should be disabled for the whole port.
3162          */
3163         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3164                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3165                                     "preconditions - canceling the feature for "
3166                                     "the whole port[%d]",
3167                              rxq->queue_id, rxq->port_id);
3168                 adapter->rx_bulk_alloc_allowed = false;
3169         }
3170
3171         /*
3172          * Allocate software ring. Allow for space at the end of the
3173          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3174          * function does not access an invalid memory region.
3175          */
3176         len = nb_desc;
3177         if (adapter->rx_bulk_alloc_allowed)
3178                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3179
3180         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3181                                           sizeof(struct ixgbe_rx_entry) * len,
3182                                           RTE_CACHE_LINE_SIZE, socket_id);
3183         if (!rxq->sw_ring) {
3184                 ixgbe_rx_queue_release(rxq);
3185                 return -ENOMEM;
3186         }
3187
3188         /*
3189          * Always allocate even if it's not going to be needed in order to
3190          * simplify the code.
3191          *
3192          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3193          * be requested in ixgbe_dev_rx_init(), which is called later from
3194          * dev_start() flow.
3195          */
3196         rxq->sw_sc_ring =
3197                 rte_zmalloc_socket("rxq->sw_sc_ring",
3198                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3199                                    RTE_CACHE_LINE_SIZE, socket_id);
3200         if (!rxq->sw_sc_ring) {
3201                 ixgbe_rx_queue_release(rxq);
3202                 return -ENOMEM;
3203         }
3204
3205         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3206                             "dma_addr=0x%"PRIx64,
3207                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3208                      rxq->rx_ring_phys_addr);
3209
3210         if (!rte_is_power_of_2(nb_desc)) {
3211                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3212                                     "preconditions - canceling the feature for "
3213                                     "the whole port[%d]",
3214                              rxq->queue_id, rxq->port_id);
3215                 adapter->rx_vec_allowed = false;
3216         } else
3217                 ixgbe_rxq_vec_setup(rxq);
3218
3219         dev->data->rx_queues[queue_idx] = rxq;
3220
3221         ixgbe_reset_rx_queue(adapter, rxq);
3222
3223         return 0;
3224 }
3225
3226 uint32_t
3227 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3228 {
3229 #define IXGBE_RXQ_SCAN_INTERVAL 4
3230         volatile union ixgbe_adv_rx_desc *rxdp;
3231         struct ixgbe_rx_queue *rxq;
3232         uint32_t desc = 0;
3233
3234         rxq = dev->data->rx_queues[rx_queue_id];
3235         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3236
3237         while ((desc < rxq->nb_rx_desc) &&
3238                 (rxdp->wb.upper.status_error &
3239                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3240                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3241                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3242                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3243                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3244                                 desc - rxq->nb_rx_desc]);
3245         }
3246
3247         return desc;
3248 }
3249
3250 int
3251 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3252 {
3253         volatile union ixgbe_adv_rx_desc *rxdp;
3254         struct ixgbe_rx_queue *rxq = rx_queue;
3255         uint32_t desc;
3256
3257         if (unlikely(offset >= rxq->nb_rx_desc))
3258                 return 0;
3259         desc = rxq->rx_tail + offset;
3260         if (desc >= rxq->nb_rx_desc)
3261                 desc -= rxq->nb_rx_desc;
3262
3263         rxdp = &rxq->rx_ring[desc];
3264         return !!(rxdp->wb.upper.status_error &
3265                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3266 }
3267
3268 int
3269 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3270 {
3271         struct ixgbe_rx_queue *rxq = rx_queue;
3272         volatile uint32_t *status;
3273         uint32_t nb_hold, desc;
3274
3275         if (unlikely(offset >= rxq->nb_rx_desc))
3276                 return -EINVAL;
3277
3278 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3279         if (rxq->rx_using_sse)
3280                 nb_hold = rxq->rxrearm_nb;
3281         else
3282 #endif
3283                 nb_hold = rxq->nb_rx_hold;
3284         if (offset >= rxq->nb_rx_desc - nb_hold)
3285                 return RTE_ETH_RX_DESC_UNAVAIL;
3286
3287         desc = rxq->rx_tail + offset;
3288         if (desc >= rxq->nb_rx_desc)
3289                 desc -= rxq->nb_rx_desc;
3290
3291         status = &rxq->rx_ring[desc].wb.upper.status_error;
3292         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3293                 return RTE_ETH_RX_DESC_DONE;
3294
3295         return RTE_ETH_RX_DESC_AVAIL;
3296 }
3297
3298 int
3299 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3300 {
3301         struct ixgbe_tx_queue *txq = tx_queue;
3302         volatile uint32_t *status;
3303         uint32_t desc;
3304
3305         if (unlikely(offset >= txq->nb_tx_desc))
3306                 return -EINVAL;
3307
3308         desc = txq->tx_tail + offset;
3309         /* go to next desc that has the RS bit */
3310         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3311                 txq->tx_rs_thresh;
3312         if (desc >= txq->nb_tx_desc) {
3313                 desc -= txq->nb_tx_desc;
3314                 if (desc >= txq->nb_tx_desc)
3315                         desc -= txq->nb_tx_desc;
3316         }
3317
3318         status = &txq->tx_ring[desc].wb.status;
3319         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3320                 return RTE_ETH_TX_DESC_DONE;
3321
3322         return RTE_ETH_TX_DESC_FULL;
3323 }
3324
3325 /*
3326  * Set up link loopback for X540/X550 mode Tx->Rx.
3327  */
3328 static inline void __rte_cold
3329 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3330 {
3331         uint32_t macc;
3332         PMD_INIT_FUNC_TRACE();
3333
3334         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3335
3336         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3337                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3338         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3339
3340         if (enable) {
3341                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3342                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3343                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3344                 macc |= IXGBE_MACC_FLU;
3345         } else {
3346                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3347                 macc &= ~IXGBE_MACC_FLU;
3348         }
3349
3350         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3351                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3352
3353         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3354 }
3355
3356 void __rte_cold
3357 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3358 {
3359         unsigned i;
3360         struct ixgbe_adapter *adapter = dev->data->dev_private;
3361         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3362
3363         PMD_INIT_FUNC_TRACE();
3364
3365         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3366                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3367
3368                 if (txq != NULL) {
3369                         txq->ops->release_mbufs(txq);
3370                         txq->ops->reset(txq);
3371                 }
3372         }
3373
3374         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3375                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3376
3377                 if (rxq != NULL) {
3378                         ixgbe_rx_queue_release_mbufs(rxq);
3379                         ixgbe_reset_rx_queue(adapter, rxq);
3380                 }
3381         }
3382         /* If loopback mode was enabled, reconfigure the link accordingly */
3383         if (dev->data->dev_conf.lpbk_mode != 0) {
3384                 if (hw->mac.type == ixgbe_mac_X540 ||
3385                      hw->mac.type == ixgbe_mac_X550 ||
3386                      hw->mac.type == ixgbe_mac_X550EM_x ||
3387                      hw->mac.type == ixgbe_mac_X550EM_a)
3388                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3389         }
3390 }
3391
3392 void
3393 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3394 {
3395         unsigned i;
3396
3397         PMD_INIT_FUNC_TRACE();
3398
3399         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3400                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3401                 dev->data->rx_queues[i] = NULL;
3402                 rte_eth_dma_zone_free(dev, "rx_ring", i);
3403         }
3404         dev->data->nb_rx_queues = 0;
3405
3406         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3407                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3408                 dev->data->tx_queues[i] = NULL;
3409                 rte_eth_dma_zone_free(dev, "tx_ring", i);
3410         }
3411         dev->data->nb_tx_queues = 0;
3412 }
3413
3414 /*********************************************************************
3415  *
3416  *  Device RX/TX init functions
3417  *
3418  **********************************************************************/
3419
3420 /**
3421  * Receive Side Scaling (RSS)
3422  * See section 7.1.2.8 in the following document:
3423  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3424  *
3425  * Principles:
3426  * The source and destination IP addresses of the IP header and the source
3427  * and destination ports of TCP/UDP headers, if any, of received packets are
3428  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3429  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3430  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3431  * RSS output index which is used as the RX queue index where to store the
3432  * received packets.
3433  * The following output is supplied in the RX write-back descriptor:
3434  *     - 32-bit result of the Microsoft RSS hash function,
3435  *     - 4-bit RSS type field.
3436  */
3437
3438 /*
3439  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3440  * Used as the default key.
3441  */
3442 static uint8_t rss_intel_key[40] = {
3443         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3444         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3445         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3446         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3447         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3448 };
3449
3450 static void
3451 ixgbe_rss_disable(struct rte_eth_dev *dev)
3452 {
3453         struct ixgbe_hw *hw;
3454         uint32_t mrqc;
3455         uint32_t mrqc_reg;
3456
3457         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3458         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3459         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3460         mrqc &= ~IXGBE_MRQC_RSSEN;
3461         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3462 }
3463
3464 static void
3465 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3466 {
3467         uint8_t  *hash_key;
3468         uint32_t mrqc;
3469         uint32_t rss_key;
3470         uint64_t rss_hf;
3471         uint16_t i;
3472         uint32_t mrqc_reg;
3473         uint32_t rssrk_reg;
3474
3475         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3476         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3477
3478         hash_key = rss_conf->rss_key;
3479         if (hash_key != NULL) {
3480                 /* Fill in RSS hash key */
3481                 for (i = 0; i < 10; i++) {
3482                         rss_key  = hash_key[(i * 4)];
3483                         rss_key |= hash_key[(i * 4) + 1] << 8;
3484                         rss_key |= hash_key[(i * 4) + 2] << 16;
3485                         rss_key |= hash_key[(i * 4) + 3] << 24;
3486                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3487                 }
3488         }
3489
3490         /* Set configured hashing protocols in MRQC register */
3491         rss_hf = rss_conf->rss_hf;
3492         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3493         if (rss_hf & ETH_RSS_IPV4)
3494                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3495         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3496                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3497         if (rss_hf & ETH_RSS_IPV6)
3498                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3499         if (rss_hf & ETH_RSS_IPV6_EX)
3500                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3501         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3502                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3503         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3504                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3505         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3506                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3507         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3508                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3509         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3510                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3511         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3512 }
3513
3514 int
3515 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3516                           struct rte_eth_rss_conf *rss_conf)
3517 {
3518         struct ixgbe_hw *hw;
3519         uint32_t mrqc;
3520         uint64_t rss_hf;
3521         uint32_t mrqc_reg;
3522
3523         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3524
3525         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3526                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3527                         "NIC.");
3528                 return -ENOTSUP;
3529         }
3530         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3531
3532         /*
3533          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3534          *     "RSS enabling cannot be done dynamically while it must be
3535          *      preceded by a software reset"
3536          * Before changing anything, first check that the update RSS operation
3537          * does not attempt to disable RSS, if RSS was enabled at
3538          * initialization time, or does not attempt to enable RSS, if RSS was
3539          * disabled at initialization time.
3540          */
3541         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3542         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3543         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3544                 if (rss_hf != 0) /* Enable RSS */
3545                         return -(EINVAL);
3546                 return 0; /* Nothing to do */
3547         }
3548         /* RSS enabled */
3549         if (rss_hf == 0) /* Disable RSS */
3550                 return -(EINVAL);
3551         ixgbe_hw_rss_hash_set(hw, rss_conf);
3552         return 0;
3553 }
3554
3555 int
3556 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3557                             struct rte_eth_rss_conf *rss_conf)
3558 {
3559         struct ixgbe_hw *hw;
3560         uint8_t *hash_key;
3561         uint32_t mrqc;
3562         uint32_t rss_key;
3563         uint64_t rss_hf;
3564         uint16_t i;
3565         uint32_t mrqc_reg;
3566         uint32_t rssrk_reg;
3567
3568         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3569         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3570         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3571         hash_key = rss_conf->rss_key;
3572         if (hash_key != NULL) {
3573                 /* Return RSS hash key */
3574                 for (i = 0; i < 10; i++) {
3575                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3576                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3577                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3578                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3579                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3580                 }
3581         }
3582
3583         /* Get RSS functions configured in MRQC register */
3584         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3585         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3586                 rss_conf->rss_hf = 0;
3587                 return 0;
3588         }
3589         rss_hf = 0;
3590         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3591                 rss_hf |= ETH_RSS_IPV4;
3592         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3593                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3594         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3595                 rss_hf |= ETH_RSS_IPV6;
3596         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3597                 rss_hf |= ETH_RSS_IPV6_EX;
3598         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3599                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3600         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3601                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3602         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3603                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3604         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3605                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3606         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3607                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3608         rss_conf->rss_hf = rss_hf;
3609         return 0;
3610 }
3611
3612 static void
3613 ixgbe_rss_configure(struct rte_eth_dev *dev)
3614 {
3615         struct rte_eth_rss_conf rss_conf;
3616         struct ixgbe_adapter *adapter;
3617         struct ixgbe_hw *hw;
3618         uint32_t reta;
3619         uint16_t i;
3620         uint16_t j;
3621         uint16_t sp_reta_size;
3622         uint32_t reta_reg;
3623
3624         PMD_INIT_FUNC_TRACE();
3625         adapter = dev->data->dev_private;
3626         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3627
3628         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3629
3630         /*
3631          * Fill in redirection table
3632          * The byte-swap is needed because NIC registers are in
3633          * little-endian order.
3634          */
3635         if (adapter->rss_reta_updated == 0) {
3636                 reta = 0;
3637                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3638                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3639
3640                         if (j == dev->data->nb_rx_queues)
3641                                 j = 0;
3642                         reta = (reta << 8) | j;
3643                         if ((i & 3) == 3)
3644                                 IXGBE_WRITE_REG(hw, reta_reg,
3645                                                 rte_bswap32(reta));
3646                 }
3647         }
3648
3649         /*
3650          * Configure the RSS key and the RSS protocols used to compute
3651          * the RSS hash of input packets.
3652          */
3653         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3654         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3655                 ixgbe_rss_disable(dev);
3656                 return;
3657         }
3658         if (rss_conf.rss_key == NULL)
3659                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3660         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3661 }
3662
3663 #define NUM_VFTA_REGISTERS 128
3664 #define NIC_RX_BUFFER_SIZE 0x200
3665 #define X550_RX_BUFFER_SIZE 0x180
3666
3667 static void
3668 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3669 {
3670         struct rte_eth_vmdq_dcb_conf *cfg;
3671         struct ixgbe_hw *hw;
3672         enum rte_eth_nb_pools num_pools;
3673         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3674         uint16_t pbsize;
3675         uint8_t nb_tcs; /* number of traffic classes */
3676         int i;
3677
3678         PMD_INIT_FUNC_TRACE();
3679         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3680         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3681         num_pools = cfg->nb_queue_pools;
3682         /* Check we have a valid number of pools */
3683         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3684                 ixgbe_rss_disable(dev);
3685                 return;
3686         }
3687         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3688         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3689
3690         /*
3691          * RXPBSIZE
3692          * split rx buffer up into sections, each for 1 traffic class
3693          */
3694         switch (hw->mac.type) {
3695         case ixgbe_mac_X550:
3696         case ixgbe_mac_X550EM_x:
3697         case ixgbe_mac_X550EM_a:
3698                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3699                 break;
3700         default:
3701                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3702                 break;
3703         }
3704         for (i = 0; i < nb_tcs; i++) {
3705                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3706
3707                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3708                 /* clear 10 bits. */
3709                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3710                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3711         }
3712         /* zero alloc all unused TCs */
3713         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3714                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3715
3716                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3717                 /* clear 10 bits. */
3718                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3719         }
3720
3721         /* MRQC: enable vmdq and dcb */
3722         mrqc = (num_pools == ETH_16_POOLS) ?
3723                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3724         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3725
3726         /* PFVTCTL: turn on virtualisation and set the default pool */
3727         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3728         if (cfg->enable_default_pool) {
3729                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3730         } else {
3731                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3732         }
3733
3734         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3735
3736         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3737         queue_mapping = 0;
3738         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3739                 /*
3740                  * mapping is done with 3 bits per priority,
3741                  * so shift by i*3 each time
3742                  */
3743                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3744
3745         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3746
3747         /* RTRPCS: DCB related */
3748         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3749
3750         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3751         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3752         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3753         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3754
3755         /* VFTA - enable all vlan filters */
3756         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3757                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3758         }
3759
3760         /* VFRE: pool enabling for receive - 16 or 32 */
3761         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3762                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3763
3764         /*
3765          * MPSAR - allow pools to read specific mac addresses
3766          * In this case, all pools should be able to read from mac addr 0
3767          */
3768         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3769         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3770
3771         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3772         for (i = 0; i < cfg->nb_pool_maps; i++) {
3773                 /* set vlan id in VF register and set the valid bit */
3774                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3775                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3776                 /*
3777                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3778                  * pools, we only need to use the first half of the register
3779                  * i.e. bits 0-31
3780                  */
3781                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3782         }
3783 }
3784
3785 /**
3786  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3787  * @dev: pointer to eth_dev structure
3788  * @dcb_config: pointer to ixgbe_dcb_config structure
3789  */
3790 static void
3791 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3792                        struct ixgbe_dcb_config *dcb_config)
3793 {
3794         uint32_t reg;
3795         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3796
3797         PMD_INIT_FUNC_TRACE();
3798         if (hw->mac.type != ixgbe_mac_82598EB) {
3799                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3800                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3801                 reg |= IXGBE_RTTDCS_ARBDIS;
3802                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3803
3804                 /* Enable DCB for Tx with 8 TCs */
3805                 if (dcb_config->num_tcs.pg_tcs == 8) {
3806                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3807                 } else {
3808                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3809                 }
3810                 if (dcb_config->vt_mode)
3811                         reg |= IXGBE_MTQC_VT_ENA;
3812                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3813
3814                 /* Enable the Tx desc arbiter */
3815                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3816                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3817                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3818
3819                 /* Enable Security TX Buffer IFG for DCB */
3820                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3821                 reg |= IXGBE_SECTX_DCB;
3822                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3823         }
3824 }
3825
3826 /**
3827  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3828  * @dev: pointer to rte_eth_dev structure
3829  * @dcb_config: pointer to ixgbe_dcb_config structure
3830  */
3831 static void
3832 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3833                         struct ixgbe_dcb_config *dcb_config)
3834 {
3835         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3836                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3837         struct ixgbe_hw *hw =
3838                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3839
3840         PMD_INIT_FUNC_TRACE();
3841         if (hw->mac.type != ixgbe_mac_82598EB)
3842                 /*PF VF Transmit Enable*/
3843                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3844                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3845
3846         /*Configure general DCB TX parameters*/
3847         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3848 }
3849
3850 static void
3851 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3852                         struct ixgbe_dcb_config *dcb_config)
3853 {
3854         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3855                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3856         struct ixgbe_dcb_tc_config *tc;
3857         uint8_t i, j;
3858
3859         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3860         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3861                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3862                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3863         } else {
3864                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3865                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3866         }
3867
3868         /* Initialize User Priority to Traffic Class mapping */
3869         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3870                 tc = &dcb_config->tc_config[j];
3871                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3872         }
3873
3874         /* User Priority to Traffic Class mapping */
3875         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3876                 j = vmdq_rx_conf->dcb_tc[i];
3877                 tc = &dcb_config->tc_config[j];
3878                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3879                                                 (uint8_t)(1 << i);
3880         }
3881 }
3882
3883 static void
3884 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3885                         struct ixgbe_dcb_config *dcb_config)
3886 {
3887         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3888                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3889         struct ixgbe_dcb_tc_config *tc;
3890         uint8_t i, j;
3891
3892         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3893         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3894                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3895                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3896         } else {
3897                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3898                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3899         }
3900
3901         /* Initialize User Priority to Traffic Class mapping */
3902         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3903                 tc = &dcb_config->tc_config[j];
3904                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3905         }
3906
3907         /* User Priority to Traffic Class mapping */
3908         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3909                 j = vmdq_tx_conf->dcb_tc[i];
3910                 tc = &dcb_config->tc_config[j];
3911                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3912                                                 (uint8_t)(1 << i);
3913         }
3914 }
3915
3916 static void
3917 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3918                 struct ixgbe_dcb_config *dcb_config)
3919 {
3920         struct rte_eth_dcb_rx_conf *rx_conf =
3921                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3922         struct ixgbe_dcb_tc_config *tc;
3923         uint8_t i, j;
3924
3925         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3926         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3927
3928         /* Initialize User Priority to Traffic Class mapping */
3929         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3930                 tc = &dcb_config->tc_config[j];
3931                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3932         }
3933
3934         /* User Priority to Traffic Class mapping */
3935         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3936                 j = rx_conf->dcb_tc[i];
3937                 tc = &dcb_config->tc_config[j];
3938                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3939                                                 (uint8_t)(1 << i);
3940         }
3941 }
3942
3943 static void
3944 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3945                 struct ixgbe_dcb_config *dcb_config)
3946 {
3947         struct rte_eth_dcb_tx_conf *tx_conf =
3948                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3949         struct ixgbe_dcb_tc_config *tc;
3950         uint8_t i, j;
3951
3952         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3953         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3954
3955         /* Initialize User Priority to Traffic Class mapping */
3956         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3957                 tc = &dcb_config->tc_config[j];
3958                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3959         }
3960
3961         /* User Priority to Traffic Class mapping */
3962         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3963                 j = tx_conf->dcb_tc[i];
3964                 tc = &dcb_config->tc_config[j];
3965                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3966                                                 (uint8_t)(1 << i);
3967         }
3968 }
3969
3970 /**
3971  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3972  * @dev: pointer to eth_dev structure
3973  * @dcb_config: pointer to ixgbe_dcb_config structure
3974  */
3975 static void
3976 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3977                        struct ixgbe_dcb_config *dcb_config)
3978 {
3979         uint32_t reg;
3980         uint32_t vlanctrl;
3981         uint8_t i;
3982         uint32_t q;
3983         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3984
3985         PMD_INIT_FUNC_TRACE();
3986         /*
3987          * Disable the arbiter before changing parameters
3988          * (always enable recycle mode; WSP)
3989          */
3990         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3991         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3992
3993         if (hw->mac.type != ixgbe_mac_82598EB) {
3994                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3995                 if (dcb_config->num_tcs.pg_tcs == 4) {
3996                         if (dcb_config->vt_mode)
3997                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3998                                         IXGBE_MRQC_VMDQRT4TCEN;
3999                         else {
4000                                 /* no matter the mode is DCB or DCB_RSS, just
4001                                  * set the MRQE to RSSXTCEN. RSS is controlled
4002                                  * by RSS_FIELD
4003                                  */
4004                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4005                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4006                                         IXGBE_MRQC_RTRSS4TCEN;
4007                         }
4008                 }
4009                 if (dcb_config->num_tcs.pg_tcs == 8) {
4010                         if (dcb_config->vt_mode)
4011                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4012                                         IXGBE_MRQC_VMDQRT8TCEN;
4013                         else {
4014                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4015                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4016                                         IXGBE_MRQC_RTRSS8TCEN;
4017                         }
4018                 }
4019
4020                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4021
4022                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4023                         /* Disable drop for all queues in VMDQ mode*/
4024                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4025                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4026                                                 (IXGBE_QDE_WRITE |
4027                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4028                 } else {
4029                         /* Enable drop for all queues in SRIOV mode */
4030                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4031                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4032                                                 (IXGBE_QDE_WRITE |
4033                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4034                                                  IXGBE_QDE_ENABLE));
4035                 }
4036         }
4037
4038         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4039         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4040         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4041         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4042
4043         /* VFTA - enable all vlan filters */
4044         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4045                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4046         }
4047
4048         /*
4049          * Configure Rx packet plane (recycle mode; WSP) and
4050          * enable arbiter
4051          */
4052         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4053         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4054 }
4055
4056 static void
4057 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4058                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4059 {
4060         switch (hw->mac.type) {
4061         case ixgbe_mac_82598EB:
4062                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4063                 break;
4064         case ixgbe_mac_82599EB:
4065         case ixgbe_mac_X540:
4066         case ixgbe_mac_X550:
4067         case ixgbe_mac_X550EM_x:
4068         case ixgbe_mac_X550EM_a:
4069                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4070                                                   tsa, map);
4071                 break;
4072         default:
4073                 break;
4074         }
4075 }
4076
4077 static void
4078 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4079                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4080 {
4081         switch (hw->mac.type) {
4082         case ixgbe_mac_82598EB:
4083                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4084                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4085                 break;
4086         case ixgbe_mac_82599EB:
4087         case ixgbe_mac_X540:
4088         case ixgbe_mac_X550:
4089         case ixgbe_mac_X550EM_x:
4090         case ixgbe_mac_X550EM_a:
4091                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4092                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4093                 break;
4094         default:
4095                 break;
4096         }
4097 }
4098
4099 #define DCB_RX_CONFIG  1
4100 #define DCB_TX_CONFIG  1
4101 #define DCB_TX_PB      1024
4102 /**
4103  * ixgbe_dcb_hw_configure - Enable DCB and configure
4104  * general DCB in VT mode and non-VT mode parameters
4105  * @dev: pointer to rte_eth_dev structure
4106  * @dcb_config: pointer to ixgbe_dcb_config structure
4107  */
4108 static int
4109 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4110                         struct ixgbe_dcb_config *dcb_config)
4111 {
4112         int     ret = 0;
4113         uint8_t i, pfc_en, nb_tcs;
4114         uint16_t pbsize, rx_buffer_size;
4115         uint8_t config_dcb_rx = 0;
4116         uint8_t config_dcb_tx = 0;
4117         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4118         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4119         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4120         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4121         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4122         struct ixgbe_dcb_tc_config *tc;
4123         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4124                 RTE_ETHER_CRC_LEN;
4125         struct ixgbe_hw *hw =
4126                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4127         struct ixgbe_bw_conf *bw_conf =
4128                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4129
4130         switch (dev->data->dev_conf.rxmode.mq_mode) {
4131         case ETH_MQ_RX_VMDQ_DCB:
4132                 dcb_config->vt_mode = true;
4133                 if (hw->mac.type != ixgbe_mac_82598EB) {
4134                         config_dcb_rx = DCB_RX_CONFIG;
4135                         /*
4136                          *get dcb and VT rx configuration parameters
4137                          *from rte_eth_conf
4138                          */
4139                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4140                         /*Configure general VMDQ and DCB RX parameters*/
4141                         ixgbe_vmdq_dcb_configure(dev);
4142                 }
4143                 break;
4144         case ETH_MQ_RX_DCB:
4145         case ETH_MQ_RX_DCB_RSS:
4146                 dcb_config->vt_mode = false;
4147                 config_dcb_rx = DCB_RX_CONFIG;
4148                 /* Get dcb TX configuration parameters from rte_eth_conf */
4149                 ixgbe_dcb_rx_config(dev, dcb_config);
4150                 /*Configure general DCB RX parameters*/
4151                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4152                 break;
4153         default:
4154                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4155                 break;
4156         }
4157         switch (dev->data->dev_conf.txmode.mq_mode) {
4158         case ETH_MQ_TX_VMDQ_DCB:
4159                 dcb_config->vt_mode = true;
4160                 config_dcb_tx = DCB_TX_CONFIG;
4161                 /* get DCB and VT TX configuration parameters
4162                  * from rte_eth_conf
4163                  */
4164                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4165                 /*Configure general VMDQ and DCB TX parameters*/
4166                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4167                 break;
4168
4169         case ETH_MQ_TX_DCB:
4170                 dcb_config->vt_mode = false;
4171                 config_dcb_tx = DCB_TX_CONFIG;
4172                 /*get DCB TX configuration parameters from rte_eth_conf*/
4173                 ixgbe_dcb_tx_config(dev, dcb_config);
4174                 /*Configure general DCB TX parameters*/
4175                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4176                 break;
4177         default:
4178                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4179                 break;
4180         }
4181
4182         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4183         /* Unpack map */
4184         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4185         if (nb_tcs == ETH_4_TCS) {
4186                 /* Avoid un-configured priority mapping to TC0 */
4187                 uint8_t j = 4;
4188                 uint8_t mask = 0xFF;
4189
4190                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4191                         mask = (uint8_t)(mask & (~(1 << map[i])));
4192                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4193                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4194                                 map[j++] = i;
4195                         mask >>= 1;
4196                 }
4197                 /* Re-configure 4 TCs BW */
4198                 for (i = 0; i < nb_tcs; i++) {
4199                         tc = &dcb_config->tc_config[i];
4200                         if (bw_conf->tc_num != nb_tcs)
4201                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4202                                         (uint8_t)(100 / nb_tcs);
4203                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4204                                                 (uint8_t)(100 / nb_tcs);
4205                 }
4206                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4207                         tc = &dcb_config->tc_config[i];
4208                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4209                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4210                 }
4211         } else {
4212                 /* Re-configure 8 TCs BW */
4213                 for (i = 0; i < nb_tcs; i++) {
4214                         tc = &dcb_config->tc_config[i];
4215                         if (bw_conf->tc_num != nb_tcs)
4216                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4217                                         (uint8_t)(100 / nb_tcs + (i & 1));
4218                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4219                                 (uint8_t)(100 / nb_tcs + (i & 1));
4220                 }
4221         }
4222
4223         switch (hw->mac.type) {
4224         case ixgbe_mac_X550:
4225         case ixgbe_mac_X550EM_x:
4226         case ixgbe_mac_X550EM_a:
4227                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4228                 break;
4229         default:
4230                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4231                 break;
4232         }
4233
4234         if (config_dcb_rx) {
4235                 /* Set RX buffer size */
4236                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4237                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4238
4239                 for (i = 0; i < nb_tcs; i++) {
4240                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4241                 }
4242                 /* zero alloc all unused TCs */
4243                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4244                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4245                 }
4246         }
4247         if (config_dcb_tx) {
4248                 /* Only support an equally distributed
4249                  *  Tx packet buffer strategy.
4250                  */
4251                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4252                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4253
4254                 for (i = 0; i < nb_tcs; i++) {
4255                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4256                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4257                 }
4258                 /* Clear unused TCs, if any, to zero buffer size*/
4259                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4260                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4261                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4262                 }
4263         }
4264
4265         /*Calculates traffic class credits*/
4266         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4267                                 IXGBE_DCB_TX_CONFIG);
4268         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4269                                 IXGBE_DCB_RX_CONFIG);
4270
4271         if (config_dcb_rx) {
4272                 /* Unpack CEE standard containers */
4273                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4274                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4275                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4276                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4277                 /* Configure PG(ETS) RX */
4278                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4279         }
4280
4281         if (config_dcb_tx) {
4282                 /* Unpack CEE standard containers */
4283                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4284                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4285                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4286                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4287                 /* Configure PG(ETS) TX */
4288                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4289         }
4290
4291         /*Configure queue statistics registers*/
4292         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4293
4294         /* Check if the PFC is supported */
4295         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4296                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4297                 for (i = 0; i < nb_tcs; i++) {
4298                         /*
4299                         * If the TC count is 8,and the default high_water is 48,
4300                         * the low_water is 16 as default.
4301                         */
4302                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4303                         hw->fc.low_water[i] = pbsize / 4;
4304                         /* Enable pfc for this TC */
4305                         tc = &dcb_config->tc_config[i];
4306                         tc->pfc = ixgbe_dcb_pfc_enabled;
4307                 }
4308                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4309                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4310                         pfc_en &= 0x0F;
4311                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4312         }
4313
4314         return ret;
4315 }
4316
4317 /**
4318  * ixgbe_configure_dcb - Configure DCB  Hardware
4319  * @dev: pointer to rte_eth_dev
4320  */
4321 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4322 {
4323         struct ixgbe_dcb_config *dcb_cfg =
4324                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4325         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4326
4327         PMD_INIT_FUNC_TRACE();
4328
4329         /* check support mq_mode for DCB */
4330         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4331             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4332             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4333                 return;
4334
4335         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4336                 return;
4337
4338         /** Configure DCB hardware **/
4339         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4340 }
4341
4342 /*
4343  * VMDq only support for 10 GbE NIC.
4344  */
4345 static void
4346 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4347 {
4348         struct rte_eth_vmdq_rx_conf *cfg;
4349         struct ixgbe_hw *hw;
4350         enum rte_eth_nb_pools num_pools;
4351         uint32_t mrqc, vt_ctl, vlanctrl;
4352         uint32_t vmolr = 0;
4353         int i;
4354
4355         PMD_INIT_FUNC_TRACE();
4356         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4357         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4358         num_pools = cfg->nb_queue_pools;
4359
4360         ixgbe_rss_disable(dev);
4361
4362         /* MRQC: enable vmdq */
4363         mrqc = IXGBE_MRQC_VMDQEN;
4364         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4365
4366         /* PFVTCTL: turn on virtualisation and set the default pool */
4367         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4368         if (cfg->enable_default_pool)
4369                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4370         else
4371                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4372
4373         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4374
4375         for (i = 0; i < (int)num_pools; i++) {
4376                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4377                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4378         }
4379
4380         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4381         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4382         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4383         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4384
4385         /* VFTA - enable all vlan filters */
4386         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4387                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4388
4389         /* VFRE: pool enabling for receive - 64 */
4390         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4391         if (num_pools == ETH_64_POOLS)
4392                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4393
4394         /*
4395          * MPSAR - allow pools to read specific mac addresses
4396          * In this case, all pools should be able to read from mac addr 0
4397          */
4398         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4399         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4400
4401         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4402         for (i = 0; i < cfg->nb_pool_maps; i++) {
4403                 /* set vlan id in VF register and set the valid bit */
4404                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4405                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4406                 /*
4407                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4408                  * pools, we only need to use the first half of the register
4409                  * i.e. bits 0-31
4410                  */
4411                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4412                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4413                                         (cfg->pool_map[i].pools & UINT32_MAX));
4414                 else
4415                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4416                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4417
4418         }
4419
4420         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4421         if (cfg->enable_loop_back) {
4422                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4423                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4424                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4425         }
4426
4427         IXGBE_WRITE_FLUSH(hw);
4428 }
4429
4430 /*
4431  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4432  * @hw: pointer to hardware structure
4433  */
4434 static void
4435 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4436 {
4437         uint32_t reg;
4438         uint32_t q;
4439
4440         PMD_INIT_FUNC_TRACE();
4441         /*PF VF Transmit Enable*/
4442         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4443         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4444
4445         /* Disable the Tx desc arbiter so that MTQC can be changed */
4446         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4447         reg |= IXGBE_RTTDCS_ARBDIS;
4448         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4449
4450         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4451         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4452
4453         /* Disable drop for all queues */
4454         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4455                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4456                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4457
4458         /* Enable the Tx desc arbiter */
4459         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4460         reg &= ~IXGBE_RTTDCS_ARBDIS;
4461         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4462
4463         IXGBE_WRITE_FLUSH(hw);
4464 }
4465
4466 static int __rte_cold
4467 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4468 {
4469         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4470         uint64_t dma_addr;
4471         unsigned int i;
4472
4473         /* Initialize software ring entries */
4474         for (i = 0; i < rxq->nb_rx_desc; i++) {
4475                 volatile union ixgbe_adv_rx_desc *rxd;
4476                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4477
4478                 if (mbuf == NULL) {
4479                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4480                                      (unsigned) rxq->queue_id);
4481                         return -ENOMEM;
4482                 }
4483
4484                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4485                 mbuf->port = rxq->port_id;
4486
4487                 dma_addr =
4488                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4489                 rxd = &rxq->rx_ring[i];
4490                 rxd->read.hdr_addr = 0;
4491                 rxd->read.pkt_addr = dma_addr;
4492                 rxe[i].mbuf = mbuf;
4493         }
4494
4495         return 0;
4496 }
4497
4498 static int
4499 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4500 {
4501         struct ixgbe_hw *hw;
4502         uint32_t mrqc;
4503
4504         ixgbe_rss_configure(dev);
4505
4506         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4507
4508         /* MRQC: enable VF RSS */
4509         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4510         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4511         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4512         case ETH_64_POOLS:
4513                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4514                 break;
4515
4516         case ETH_32_POOLS:
4517                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4518                 break;
4519
4520         default:
4521                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4522                 return -EINVAL;
4523         }
4524
4525         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4526
4527         return 0;
4528 }
4529
4530 static int
4531 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4532 {
4533         struct ixgbe_hw *hw =
4534                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4535
4536         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4537         case ETH_64_POOLS:
4538                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4539                         IXGBE_MRQC_VMDQEN);
4540                 break;
4541
4542         case ETH_32_POOLS:
4543                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4544                         IXGBE_MRQC_VMDQRT4TCEN);
4545                 break;
4546
4547         case ETH_16_POOLS:
4548                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4549                         IXGBE_MRQC_VMDQRT8TCEN);
4550                 break;
4551         default:
4552                 PMD_INIT_LOG(ERR,
4553                         "invalid pool number in IOV mode");
4554                 break;
4555         }
4556         return 0;
4557 }
4558
4559 static int
4560 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4561 {
4562         struct ixgbe_hw *hw =
4563                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4564
4565         if (hw->mac.type == ixgbe_mac_82598EB)
4566                 return 0;
4567
4568         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4569                 /*
4570                  * SRIOV inactive scheme
4571                  * any DCB/RSS w/o VMDq multi-queue setting
4572                  */
4573                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4574                 case ETH_MQ_RX_RSS:
4575                 case ETH_MQ_RX_DCB_RSS:
4576                 case ETH_MQ_RX_VMDQ_RSS:
4577                         ixgbe_rss_configure(dev);
4578                         break;
4579
4580                 case ETH_MQ_RX_VMDQ_DCB:
4581                         ixgbe_vmdq_dcb_configure(dev);
4582                         break;
4583
4584                 case ETH_MQ_RX_VMDQ_ONLY:
4585                         ixgbe_vmdq_rx_hw_configure(dev);
4586                         break;
4587
4588                 case ETH_MQ_RX_NONE:
4589                 default:
4590                         /* if mq_mode is none, disable rss mode.*/
4591                         ixgbe_rss_disable(dev);
4592                         break;
4593                 }
4594         } else {
4595                 /* SRIOV active scheme
4596                  * Support RSS together with SRIOV.
4597                  */
4598                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4599                 case ETH_MQ_RX_RSS:
4600                 case ETH_MQ_RX_VMDQ_RSS:
4601                         ixgbe_config_vf_rss(dev);
4602                         break;
4603                 case ETH_MQ_RX_VMDQ_DCB:
4604                 case ETH_MQ_RX_DCB:
4605                 /* In SRIOV, the configuration is the same as VMDq case */
4606                         ixgbe_vmdq_dcb_configure(dev);
4607                         break;
4608                 /* DCB/RSS together with SRIOV is not supported */
4609                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4610                 case ETH_MQ_RX_DCB_RSS:
4611                         PMD_INIT_LOG(ERR,
4612                                 "Could not support DCB/RSS with VMDq & SRIOV");
4613                         return -1;
4614                 default:
4615                         ixgbe_config_vf_default(dev);
4616                         break;
4617                 }
4618         }
4619
4620         return 0;
4621 }
4622
4623 static int
4624 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4625 {
4626         struct ixgbe_hw *hw =
4627                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4628         uint32_t mtqc;
4629         uint32_t rttdcs;
4630
4631         if (hw->mac.type == ixgbe_mac_82598EB)
4632                 return 0;
4633
4634         /* disable arbiter before setting MTQC */
4635         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4636         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4637         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4638
4639         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4640                 /*
4641                  * SRIOV inactive scheme
4642                  * any DCB w/o VMDq multi-queue setting
4643                  */
4644                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4645                         ixgbe_vmdq_tx_hw_configure(hw);
4646                 else {
4647                         mtqc = IXGBE_MTQC_64Q_1PB;
4648                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4649                 }
4650         } else {
4651                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4652
4653                 /*
4654                  * SRIOV active scheme
4655                  * FIXME if support DCB together with VMDq & SRIOV
4656                  */
4657                 case ETH_64_POOLS:
4658                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4659                         break;
4660                 case ETH_32_POOLS:
4661                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4662                         break;
4663                 case ETH_16_POOLS:
4664                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4665                                 IXGBE_MTQC_8TC_8TQ;
4666                         break;
4667                 default:
4668                         mtqc = IXGBE_MTQC_64Q_1PB;
4669                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4670                 }
4671                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4672         }
4673
4674         /* re-enable arbiter */
4675         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4676         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4677
4678         return 0;
4679 }
4680
4681 /**
4682  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4683  *
4684  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4685  * spec rev. 3.0 chapter 8.2.3.8.13.
4686  *
4687  * @pool Memory pool of the Rx queue
4688  */
4689 static inline uint32_t
4690 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4691 {
4692         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4693
4694         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4695         uint16_t maxdesc =
4696                 RTE_IPV4_MAX_PKT_LEN /
4697                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4698
4699         if (maxdesc >= 16)
4700                 return IXGBE_RSCCTL_MAXDESC_16;
4701         else if (maxdesc >= 8)
4702                 return IXGBE_RSCCTL_MAXDESC_8;
4703         else if (maxdesc >= 4)
4704                 return IXGBE_RSCCTL_MAXDESC_4;
4705         else
4706                 return IXGBE_RSCCTL_MAXDESC_1;
4707 }
4708
4709 /**
4710  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4711  * interrupt
4712  *
4713  * (Taken from FreeBSD tree)
4714  * (yes this is all very magic and confusing :)
4715  *
4716  * @dev port handle
4717  * @entry the register array entry
4718  * @vector the MSIX vector for this queue
4719  * @type RX/TX/MISC
4720  */
4721 static void
4722 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4723 {
4724         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4725         u32 ivar, index;
4726
4727         vector |= IXGBE_IVAR_ALLOC_VAL;
4728
4729         switch (hw->mac.type) {
4730
4731         case ixgbe_mac_82598EB:
4732                 if (type == -1)
4733                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4734                 else
4735                         entry += (type * 64);
4736                 index = (entry >> 2) & 0x1F;
4737                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4738                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4739                 ivar |= (vector << (8 * (entry & 0x3)));
4740                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4741                 break;
4742
4743         case ixgbe_mac_82599EB:
4744         case ixgbe_mac_X540:
4745                 if (type == -1) { /* MISC IVAR */
4746                         index = (entry & 1) * 8;
4747                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4748                         ivar &= ~(0xFF << index);
4749                         ivar |= (vector << index);
4750                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4751                 } else {        /* RX/TX IVARS */
4752                         index = (16 * (entry & 1)) + (8 * type);
4753                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4754                         ivar &= ~(0xFF << index);
4755                         ivar |= (vector << index);
4756                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4757                 }
4758
4759                 break;
4760
4761         default:
4762                 break;
4763         }
4764 }
4765
4766 void __rte_cold
4767 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4768 {
4769         uint16_t i, rx_using_sse;
4770         struct ixgbe_adapter *adapter = dev->data->dev_private;
4771
4772         /*
4773          * In order to allow Vector Rx there are a few configuration
4774          * conditions to be met and Rx Bulk Allocation should be allowed.
4775          */
4776         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4777             !adapter->rx_bulk_alloc_allowed ||
4778                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4779                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4780                                     "preconditions",
4781                              dev->data->port_id);
4782
4783                 adapter->rx_vec_allowed = false;
4784         }
4785
4786         /*
4787          * Initialize the appropriate LRO callback.
4788          *
4789          * If all queues satisfy the bulk allocation preconditions
4790          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4791          * Otherwise use a single allocation version.
4792          */
4793         if (dev->data->lro) {
4794                 if (adapter->rx_bulk_alloc_allowed) {
4795                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4796                                            "allocation version");
4797                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4798                 } else {
4799                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4800                                            "allocation version");
4801                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4802                 }
4803         } else if (dev->data->scattered_rx) {
4804                 /*
4805                  * Set the non-LRO scattered callback: there are Vector and
4806                  * single allocation versions.
4807                  */
4808                 if (adapter->rx_vec_allowed) {
4809                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4810                                             "callback (port=%d).",
4811                                      dev->data->port_id);
4812
4813                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4814                 } else if (adapter->rx_bulk_alloc_allowed) {
4815                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4816                                            "allocation callback (port=%d).",
4817                                      dev->data->port_id);
4818                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4819                 } else {
4820                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4821                                             "single allocation) "
4822                                             "Scattered Rx callback "
4823                                             "(port=%d).",
4824                                      dev->data->port_id);
4825
4826                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4827                 }
4828         /*
4829          * Below we set "simple" callbacks according to port/queues parameters.
4830          * If parameters allow we are going to choose between the following
4831          * callbacks:
4832          *    - Vector
4833          *    - Bulk Allocation
4834          *    - Single buffer allocation (the simplest one)
4835          */
4836         } else if (adapter->rx_vec_allowed) {
4837                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4838                                     "burst size no less than %d (port=%d).",
4839                              RTE_IXGBE_DESCS_PER_LOOP,
4840                              dev->data->port_id);
4841
4842                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4843         } else if (adapter->rx_bulk_alloc_allowed) {
4844                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4845                                     "satisfied. Rx Burst Bulk Alloc function "
4846                                     "will be used on port=%d.",
4847                              dev->data->port_id);
4848
4849                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4850         } else {
4851                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4852                                     "satisfied, or Scattered Rx is requested "
4853                                     "(port=%d).",
4854                              dev->data->port_id);
4855
4856                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4857         }
4858
4859         /* Propagate information about RX function choice through all queues. */
4860
4861         rx_using_sse =
4862                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4863                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4864
4865         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4866                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4867
4868                 rxq->rx_using_sse = rx_using_sse;
4869 #ifdef RTE_LIB_SECURITY
4870                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4871                                 DEV_RX_OFFLOAD_SECURITY);
4872 #endif
4873         }
4874 }
4875
4876 /**
4877  * ixgbe_set_rsc - configure RSC related port HW registers
4878  *
4879  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4880  * of 82599 Spec (x540 configuration is virtually the same).
4881  *
4882  * @dev port handle
4883  *
4884  * Returns 0 in case of success or a non-zero error code
4885  */
4886 static int
4887 ixgbe_set_rsc(struct rte_eth_dev *dev)
4888 {
4889         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4890         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4891         struct rte_eth_dev_info dev_info = { 0 };
4892         bool rsc_capable = false;
4893         uint16_t i;
4894         uint32_t rdrxctl;
4895         uint32_t rfctl;
4896
4897         /* Sanity check */
4898         dev->dev_ops->dev_infos_get(dev, &dev_info);
4899         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4900                 rsc_capable = true;
4901
4902         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4903                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4904                                    "support it");
4905                 return -EINVAL;
4906         }
4907
4908         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4909
4910         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4911              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4912                 /*
4913                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4914                  * 3.0 RSC configuration requires HW CRC stripping being
4915                  * enabled. If user requested both HW CRC stripping off
4916                  * and RSC on - return an error.
4917                  */
4918                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4919                                     "is disabled");
4920                 return -EINVAL;
4921         }
4922
4923         /* RFCTL configuration  */
4924         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4925         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4926                 /*
4927                  * Since NFS packets coalescing is not supported - clear
4928                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4929                  * enabled.
4930                  */
4931                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4932                            IXGBE_RFCTL_NFSR_DIS);
4933         else
4934                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4935         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4936
4937         /* If LRO hasn't been requested - we are done here. */
4938         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4939                 return 0;
4940
4941         /* Set RDRXCTL.RSCACKC bit */
4942         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4943         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4944         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4945
4946         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4947         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4948                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4949                 uint32_t srrctl =
4950                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4951                 uint32_t rscctl =
4952                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4953                 uint32_t psrtype =
4954                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4955                 uint32_t eitr =
4956                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4957
4958                 /*
4959                  * ixgbe PMD doesn't support header-split at the moment.
4960                  *
4961                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4962                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4963                  * should be configured even if header split is not
4964                  * enabled. We will configure it 128 bytes following the
4965                  * recommendation in the spec.
4966                  */
4967                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4968                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4969                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4970
4971                 /*
4972                  * TODO: Consider setting the Receive Descriptor Minimum
4973                  * Threshold Size for an RSC case. This is not an obviously
4974                  * beneficiary option but the one worth considering...
4975                  */
4976
4977                 rscctl |= IXGBE_RSCCTL_RSCEN;
4978                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4979                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4980
4981                 /*
4982                  * RSC: Set ITR interval corresponding to 2K ints/s.
4983                  *
4984                  * Full-sized RSC aggregations for a 10Gb/s link will
4985                  * arrive at about 20K aggregation/s rate.
4986                  *
4987                  * 2K inst/s rate will make only 10% of the
4988                  * aggregations to be closed due to the interrupt timer
4989                  * expiration for a streaming at wire-speed case.
4990                  *
4991                  * For a sparse streaming case this setting will yield
4992                  * at most 500us latency for a single RSC aggregation.
4993                  */
4994                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4995                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4996                 eitr |= IXGBE_EITR_CNT_WDIS;
4997
4998                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4999                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
5000                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
5001                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
5002
5003                 /*
5004                  * RSC requires the mapping of the queue to the
5005                  * interrupt vector.
5006                  */
5007                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5008         }
5009
5010         dev->data->lro = 1;
5011
5012         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5013
5014         return 0;
5015 }
5016
5017 /*
5018  * Initializes Receive Unit.
5019  */
5020 int __rte_cold
5021 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5022 {
5023         struct ixgbe_hw     *hw;
5024         struct ixgbe_rx_queue *rxq;
5025         uint64_t bus_addr;
5026         uint32_t rxctrl;
5027         uint32_t fctrl;
5028         uint32_t hlreg0;
5029         uint32_t maxfrs;
5030         uint32_t srrctl;
5031         uint32_t rdrxctl;
5032         uint32_t rxcsum;
5033         uint16_t buf_size;
5034         uint16_t i;
5035         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5036         int rc;
5037
5038         PMD_INIT_FUNC_TRACE();
5039         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5040
5041         /*
5042          * Make sure receives are disabled while setting
5043          * up the RX context (registers, descriptor rings, etc.).
5044          */
5045         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5046         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5047
5048         /* Enable receipt of broadcasted frames */
5049         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5050         fctrl |= IXGBE_FCTRL_BAM;
5051         fctrl |= IXGBE_FCTRL_DPF;
5052         fctrl |= IXGBE_FCTRL_PMCF;
5053         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5054
5055         /*
5056          * Configure CRC stripping, if any.
5057          */
5058         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5059         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5060                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5061         else
5062                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5063
5064         /*
5065          * Configure jumbo frame support, if any.
5066          */
5067         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5068                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5069                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5070                 maxfrs &= 0x0000FFFF;
5071                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5072                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5073         } else
5074                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5075
5076         /*
5077          * If loopback mode is configured, set LPBK bit.
5078          */
5079         if (dev->data->dev_conf.lpbk_mode != 0) {
5080                 rc = ixgbe_check_supported_loopback_mode(dev);
5081                 if (rc < 0) {
5082                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5083                         return rc;
5084                 }
5085                 hlreg0 |= IXGBE_HLREG0_LPBK;
5086         } else {
5087                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5088         }
5089
5090         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5091
5092         /*
5093          * Assume no header split and no VLAN strip support
5094          * on any Rx queue first .
5095          */
5096         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5097         /* Setup RX queues */
5098         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5099                 rxq = dev->data->rx_queues[i];
5100
5101                 /*
5102                  * Reset crc_len in case it was changed after queue setup by a
5103                  * call to configure.
5104                  */
5105                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5106                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5107                 else
5108                         rxq->crc_len = 0;
5109
5110                 /* Setup the Base and Length of the Rx Descriptor Rings */
5111                 bus_addr = rxq->rx_ring_phys_addr;
5112                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5113                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5114                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5115                                 (uint32_t)(bus_addr >> 32));
5116                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5117                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5118                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5119                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5120
5121                 /* Configure the SRRCTL register */
5122                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5123
5124                 /* Set if packets are dropped when no descriptors available */
5125                 if (rxq->drop_en)
5126                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5127
5128                 /*
5129                  * Configure the RX buffer size in the BSIZEPACKET field of
5130                  * the SRRCTL register of the queue.
5131                  * The value is in 1 KB resolution. Valid values can be from
5132                  * 1 KB to 16 KB.
5133                  */
5134                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5135                         RTE_PKTMBUF_HEADROOM);
5136                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5137                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5138
5139                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5140
5141                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5142                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5143
5144                 /* It adds dual VLAN length for supporting dual VLAN */
5145                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5146                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5147                         dev->data->scattered_rx = 1;
5148                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5149                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5150         }
5151
5152         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5153                 dev->data->scattered_rx = 1;
5154
5155         /*
5156          * Device configured with multiple RX queues.
5157          */
5158         ixgbe_dev_mq_rx_configure(dev);
5159
5160         /*
5161          * Setup the Checksum Register.
5162          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5163          * Enable IP/L4 checkum computation by hardware if requested to do so.
5164          */
5165         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5166         rxcsum |= IXGBE_RXCSUM_PCSD;
5167         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5168                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5169         else
5170                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5171
5172         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5173
5174         if (hw->mac.type == ixgbe_mac_82599EB ||
5175             hw->mac.type == ixgbe_mac_X540) {
5176                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5177                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5178                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5179                 else
5180                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5181                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5182                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5183         }
5184
5185         rc = ixgbe_set_rsc(dev);
5186         if (rc)
5187                 return rc;
5188
5189         ixgbe_set_rx_function(dev);
5190
5191         return 0;
5192 }
5193
5194 /*
5195  * Initializes Transmit Unit.
5196  */
5197 void __rte_cold
5198 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5199 {
5200         struct ixgbe_hw     *hw;
5201         struct ixgbe_tx_queue *txq;
5202         uint64_t bus_addr;
5203         uint32_t hlreg0;
5204         uint32_t txctrl;
5205         uint16_t i;
5206
5207         PMD_INIT_FUNC_TRACE();
5208         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5209
5210         /* Enable TX CRC (checksum offload requirement) and hw padding
5211          * (TSO requirement)
5212          */
5213         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5214         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5215         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5216
5217         /* Setup the Base and Length of the Tx Descriptor Rings */
5218         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5219                 txq = dev->data->tx_queues[i];
5220
5221                 bus_addr = txq->tx_ring_phys_addr;
5222                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5223                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5224                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5225                                 (uint32_t)(bus_addr >> 32));
5226                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5227                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5228                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5229                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5230                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5231
5232                 /*
5233                  * Disable Tx Head Writeback RO bit, since this hoses
5234                  * bookkeeping if things aren't delivered in order.
5235                  */
5236                 switch (hw->mac.type) {
5237                 case ixgbe_mac_82598EB:
5238                         txctrl = IXGBE_READ_REG(hw,
5239                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5240                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5241                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5242                                         txctrl);
5243                         break;
5244
5245                 case ixgbe_mac_82599EB:
5246                 case ixgbe_mac_X540:
5247                 case ixgbe_mac_X550:
5248                 case ixgbe_mac_X550EM_x:
5249                 case ixgbe_mac_X550EM_a:
5250                 default:
5251                         txctrl = IXGBE_READ_REG(hw,
5252                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5253                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5254                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5255                                         txctrl);
5256                         break;
5257                 }
5258         }
5259
5260         /* Device configured with multiple TX queues. */
5261         ixgbe_dev_mq_tx_configure(dev);
5262 }
5263
5264 /*
5265  * Check if requested loopback mode is supported
5266  */
5267 int
5268 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5269 {
5270         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5271
5272         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5273                 if (hw->mac.type == ixgbe_mac_82599EB ||
5274                      hw->mac.type == ixgbe_mac_X540 ||
5275                      hw->mac.type == ixgbe_mac_X550 ||
5276                      hw->mac.type == ixgbe_mac_X550EM_x ||
5277                      hw->mac.type == ixgbe_mac_X550EM_a)
5278                         return 0;
5279
5280         return -ENOTSUP;
5281 }
5282
5283 /*
5284  * Set up link for 82599 loopback mode Tx->Rx.
5285  */
5286 static inline void __rte_cold
5287 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5288 {
5289         PMD_INIT_FUNC_TRACE();
5290
5291         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5292                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5293                                 IXGBE_SUCCESS) {
5294                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5295                         /* ignore error */
5296                         return;
5297                 }
5298         }
5299
5300         /* Restart link */
5301         IXGBE_WRITE_REG(hw,
5302                         IXGBE_AUTOC,
5303                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5304         ixgbe_reset_pipeline_82599(hw);
5305
5306         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5307         msec_delay(50);
5308 }
5309
5310
5311 /*
5312  * Start Transmit and Receive Units.
5313  */
5314 int __rte_cold
5315 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5316 {
5317         struct ixgbe_hw     *hw;
5318         struct ixgbe_tx_queue *txq;
5319         struct ixgbe_rx_queue *rxq;
5320         uint32_t txdctl;
5321         uint32_t dmatxctl;
5322         uint32_t rxctrl;
5323         uint16_t i;
5324         int ret = 0;
5325
5326         PMD_INIT_FUNC_TRACE();
5327         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5328
5329         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5330                 txq = dev->data->tx_queues[i];
5331                 /* Setup Transmit Threshold Registers */
5332                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5333                 txdctl |= txq->pthresh & 0x7F;
5334                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5335                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5336                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5337         }
5338
5339         if (hw->mac.type != ixgbe_mac_82598EB) {
5340                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5341                 dmatxctl |= IXGBE_DMATXCTL_TE;
5342                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5343         }
5344
5345         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5346                 txq = dev->data->tx_queues[i];
5347                 if (!txq->tx_deferred_start) {
5348                         ret = ixgbe_dev_tx_queue_start(dev, i);
5349                         if (ret < 0)
5350                                 return ret;
5351                 }
5352         }
5353
5354         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5355                 rxq = dev->data->rx_queues[i];
5356                 if (!rxq->rx_deferred_start) {
5357                         ret = ixgbe_dev_rx_queue_start(dev, i);
5358                         if (ret < 0)
5359                                 return ret;
5360                 }
5361         }
5362
5363         /* Enable Receive engine */
5364         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5365         if (hw->mac.type == ixgbe_mac_82598EB)
5366                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5367         rxctrl |= IXGBE_RXCTRL_RXEN;
5368         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5369
5370         /* If loopback mode is enabled, set up the link accordingly */
5371         if (dev->data->dev_conf.lpbk_mode != 0) {
5372                 if (hw->mac.type == ixgbe_mac_82599EB)
5373                         ixgbe_setup_loopback_link_82599(hw);
5374                 else if (hw->mac.type == ixgbe_mac_X540 ||
5375                      hw->mac.type == ixgbe_mac_X550 ||
5376                      hw->mac.type == ixgbe_mac_X550EM_x ||
5377                      hw->mac.type == ixgbe_mac_X550EM_a)
5378                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5379         }
5380
5381 #ifdef RTE_LIB_SECURITY
5382         if ((dev->data->dev_conf.rxmode.offloads &
5383                         DEV_RX_OFFLOAD_SECURITY) ||
5384                 (dev->data->dev_conf.txmode.offloads &
5385                         DEV_TX_OFFLOAD_SECURITY)) {
5386                 ret = ixgbe_crypto_enable_ipsec(dev);
5387                 if (ret != 0) {
5388                         PMD_DRV_LOG(ERR,
5389                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5390                                     ret);
5391                         return ret;
5392                 }
5393         }
5394 #endif
5395
5396         return 0;
5397 }
5398
5399 /*
5400  * Start Receive Units for specified queue.
5401  */
5402 int __rte_cold
5403 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5404 {
5405         struct ixgbe_hw     *hw;
5406         struct ixgbe_rx_queue *rxq;
5407         uint32_t rxdctl;
5408         int poll_ms;
5409
5410         PMD_INIT_FUNC_TRACE();
5411         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5412
5413         rxq = dev->data->rx_queues[rx_queue_id];
5414
5415         /* Allocate buffers for descriptor rings */
5416         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5417                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5418                              rx_queue_id);
5419                 return -1;
5420         }
5421         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5422         rxdctl |= IXGBE_RXDCTL_ENABLE;
5423         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5424
5425         /* Wait until RX Enable ready */
5426         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5427         do {
5428                 rte_delay_ms(1);
5429                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5430         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5431         if (!poll_ms)
5432                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5433         rte_wmb();
5434         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5435         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5436         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5437
5438         return 0;
5439 }
5440
5441 /*
5442  * Stop Receive Units for specified queue.
5443  */
5444 int __rte_cold
5445 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5446 {
5447         struct ixgbe_hw     *hw;
5448         struct ixgbe_adapter *adapter = dev->data->dev_private;
5449         struct ixgbe_rx_queue *rxq;
5450         uint32_t rxdctl;
5451         int poll_ms;
5452
5453         PMD_INIT_FUNC_TRACE();
5454         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5455
5456         rxq = dev->data->rx_queues[rx_queue_id];
5457
5458         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5459         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5460         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5461
5462         /* Wait until RX Enable bit clear */
5463         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5464         do {
5465                 rte_delay_ms(1);
5466                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5467         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5468         if (!poll_ms)
5469                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5470
5471         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5472
5473         ixgbe_rx_queue_release_mbufs(rxq);
5474         ixgbe_reset_rx_queue(adapter, rxq);
5475         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5476
5477         return 0;
5478 }
5479
5480
5481 /*
5482  * Start Transmit Units for specified queue.
5483  */
5484 int __rte_cold
5485 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5486 {
5487         struct ixgbe_hw     *hw;
5488         struct ixgbe_tx_queue *txq;
5489         uint32_t txdctl;
5490         int poll_ms;
5491
5492         PMD_INIT_FUNC_TRACE();
5493         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5494
5495         txq = dev->data->tx_queues[tx_queue_id];
5496         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5497         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5498         txdctl |= IXGBE_TXDCTL_ENABLE;
5499         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5500
5501         /* Wait until TX Enable ready */
5502         if (hw->mac.type == ixgbe_mac_82599EB) {
5503                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5504                 do {
5505                         rte_delay_ms(1);
5506                         txdctl = IXGBE_READ_REG(hw,
5507                                 IXGBE_TXDCTL(txq->reg_idx));
5508                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5509                 if (!poll_ms)
5510                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5511                                 tx_queue_id);
5512         }
5513         rte_wmb();
5514         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5515         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5516
5517         return 0;
5518 }
5519
5520 /*
5521  * Stop Transmit Units for specified queue.
5522  */
5523 int __rte_cold
5524 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5525 {
5526         struct ixgbe_hw     *hw;
5527         struct ixgbe_tx_queue *txq;
5528         uint32_t txdctl;
5529         uint32_t txtdh, txtdt;
5530         int poll_ms;
5531
5532         PMD_INIT_FUNC_TRACE();
5533         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5534
5535         txq = dev->data->tx_queues[tx_queue_id];
5536
5537         /* Wait until TX queue is empty */
5538         if (hw->mac.type == ixgbe_mac_82599EB) {
5539                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5540                 do {
5541                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5542                         txtdh = IXGBE_READ_REG(hw,
5543                                                IXGBE_TDH(txq->reg_idx));
5544                         txtdt = IXGBE_READ_REG(hw,
5545                                                IXGBE_TDT(txq->reg_idx));
5546                 } while (--poll_ms && (txtdh != txtdt));
5547                 if (!poll_ms)
5548                         PMD_INIT_LOG(ERR,
5549                                 "Tx Queue %d is not empty when stopping.",
5550                                 tx_queue_id);
5551         }
5552
5553         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5554         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5555         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5556
5557         /* Wait until TX Enable bit clear */
5558         if (hw->mac.type == ixgbe_mac_82599EB) {
5559                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5560                 do {
5561                         rte_delay_ms(1);
5562                         txdctl = IXGBE_READ_REG(hw,
5563                                                 IXGBE_TXDCTL(txq->reg_idx));
5564                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5565                 if (!poll_ms)
5566                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5567                                 tx_queue_id);
5568         }
5569
5570         if (txq->ops != NULL) {
5571                 txq->ops->release_mbufs(txq);
5572                 txq->ops->reset(txq);
5573         }
5574         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5575
5576         return 0;
5577 }
5578
5579 void
5580 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5581         struct rte_eth_rxq_info *qinfo)
5582 {
5583         struct ixgbe_rx_queue *rxq;
5584
5585         rxq = dev->data->rx_queues[queue_id];
5586
5587         qinfo->mp = rxq->mb_pool;
5588         qinfo->scattered_rx = dev->data->scattered_rx;
5589         qinfo->nb_desc = rxq->nb_rx_desc;
5590
5591         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5592         qinfo->conf.rx_drop_en = rxq->drop_en;
5593         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5594         qinfo->conf.offloads = rxq->offloads;
5595 }
5596
5597 void
5598 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5599         struct rte_eth_txq_info *qinfo)
5600 {
5601         struct ixgbe_tx_queue *txq;
5602
5603         txq = dev->data->tx_queues[queue_id];
5604
5605         qinfo->nb_desc = txq->nb_tx_desc;
5606
5607         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5608         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5609         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5610
5611         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5612         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5613         qinfo->conf.offloads = txq->offloads;
5614         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5615 }
5616
5617 /*
5618  * [VF] Initializes Receive Unit.
5619  */
5620 int __rte_cold
5621 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5622 {
5623         struct ixgbe_hw     *hw;
5624         struct ixgbe_rx_queue *rxq;
5625         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5626         uint64_t bus_addr;
5627         uint32_t srrctl, psrtype = 0;
5628         uint16_t buf_size;
5629         uint16_t i;
5630         int ret;
5631
5632         PMD_INIT_FUNC_TRACE();
5633         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5634
5635         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5636                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5637                         "it should be power of 2");
5638                 return -1;
5639         }
5640
5641         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5642                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5643                         "it should be equal to or less than %d",
5644                         hw->mac.max_rx_queues);
5645                 return -1;
5646         }
5647
5648         /*
5649          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5650          * disables the VF receipt of packets if the PF MTU is > 1500.
5651          * This is done to deal with 82599 limitations that imposes
5652          * the PF and all VFs to share the same MTU.
5653          * Then, the PF driver enables again the VF receipt of packet when
5654          * the VF driver issues a IXGBE_VF_SET_LPE request.
5655          * In the meantime, the VF device cannot be used, even if the VF driver
5656          * and the Guest VM network stack are ready to accept packets with a
5657          * size up to the PF MTU.
5658          * As a work-around to this PF behaviour, force the call to
5659          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5660          * VF packets received can work in all cases.
5661          */
5662         if (ixgbevf_rlpml_set_vf(hw,
5663             (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len)) {
5664                 PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5665                              dev->data->dev_conf.rxmode.max_rx_pkt_len);
5666                 return -EINVAL;
5667         }
5668
5669         /*
5670          * Assume no header split and no VLAN strip support
5671          * on any Rx queue first .
5672          */
5673         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5674         /* Setup RX queues */
5675         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5676                 rxq = dev->data->rx_queues[i];
5677
5678                 /* Allocate buffers for descriptor rings */
5679                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5680                 if (ret)
5681                         return ret;
5682
5683                 /* Setup the Base and Length of the Rx Descriptor Rings */
5684                 bus_addr = rxq->rx_ring_phys_addr;
5685
5686                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5687                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5688                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5689                                 (uint32_t)(bus_addr >> 32));
5690                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5691                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5692                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5693                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5694
5695
5696                 /* Configure the SRRCTL register */
5697                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5698
5699                 /* Set if packets are dropped when no descriptors available */
5700                 if (rxq->drop_en)
5701                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5702
5703                 /*
5704                  * Configure the RX buffer size in the BSIZEPACKET field of
5705                  * the SRRCTL register of the queue.
5706                  * The value is in 1 KB resolution. Valid values can be from
5707                  * 1 KB to 16 KB.
5708                  */
5709                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5710                         RTE_PKTMBUF_HEADROOM);
5711                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5712                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5713
5714                 /*
5715                  * VF modification to write virtual function SRRCTL register
5716                  */
5717                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5718
5719                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5720                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5721
5722                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5723                     /* It adds dual VLAN length for supporting dual VLAN */
5724                     (rxmode->max_rx_pkt_len +
5725                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5726                         if (!dev->data->scattered_rx)
5727                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5728                         dev->data->scattered_rx = 1;
5729                 }
5730
5731                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5732                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5733         }
5734
5735         /* Set RQPL for VF RSS according to max Rx queue */
5736         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5737                 IXGBE_PSRTYPE_RQPL_SHIFT;
5738         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5739
5740         ixgbe_set_rx_function(dev);
5741
5742         return 0;
5743 }
5744
5745 /*
5746  * [VF] Initializes Transmit Unit.
5747  */
5748 void __rte_cold
5749 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5750 {
5751         struct ixgbe_hw     *hw;
5752         struct ixgbe_tx_queue *txq;
5753         uint64_t bus_addr;
5754         uint32_t txctrl;
5755         uint16_t i;
5756
5757         PMD_INIT_FUNC_TRACE();
5758         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5759
5760         /* Setup the Base and Length of the Tx Descriptor Rings */
5761         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5762                 txq = dev->data->tx_queues[i];
5763                 bus_addr = txq->tx_ring_phys_addr;
5764                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5765                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5766                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5767                                 (uint32_t)(bus_addr >> 32));
5768                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5769                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5770                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5771                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5772                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5773
5774                 /*
5775                  * Disable Tx Head Writeback RO bit, since this hoses
5776                  * bookkeeping if things aren't delivered in order.
5777                  */
5778                 txctrl = IXGBE_READ_REG(hw,
5779                                 IXGBE_VFDCA_TXCTRL(i));
5780                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5781                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5782                                 txctrl);
5783         }
5784 }
5785
5786 /*
5787  * [VF] Start Transmit and Receive Units.
5788  */
5789 void __rte_cold
5790 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5791 {
5792         struct ixgbe_hw     *hw;
5793         struct ixgbe_tx_queue *txq;
5794         struct ixgbe_rx_queue *rxq;
5795         uint32_t txdctl;
5796         uint32_t rxdctl;
5797         uint16_t i;
5798         int poll_ms;
5799
5800         PMD_INIT_FUNC_TRACE();
5801         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5802
5803         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5804                 txq = dev->data->tx_queues[i];
5805                 /* Setup Transmit Threshold Registers */
5806                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5807                 txdctl |= txq->pthresh & 0x7F;
5808                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5809                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5810                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5811         }
5812
5813         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5814
5815                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5816                 txdctl |= IXGBE_TXDCTL_ENABLE;
5817                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5818
5819                 poll_ms = 10;
5820                 /* Wait until TX Enable ready */
5821                 do {
5822                         rte_delay_ms(1);
5823                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5824                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5825                 if (!poll_ms)
5826                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5827         }
5828         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5829
5830                 rxq = dev->data->rx_queues[i];
5831
5832                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5833                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5834                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5835
5836                 /* Wait until RX Enable ready */
5837                 poll_ms = 10;
5838                 do {
5839                         rte_delay_ms(1);
5840                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5841                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5842                 if (!poll_ms)
5843                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5844                 rte_wmb();
5845                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5846
5847         }
5848 }
5849
5850 int
5851 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5852                     const struct rte_flow_action_rss *in)
5853 {
5854         if (in->key_len > RTE_DIM(out->key) ||
5855             in->queue_num > RTE_DIM(out->queue))
5856                 return -EINVAL;
5857         out->conf = (struct rte_flow_action_rss){
5858                 .func = in->func,
5859                 .level = in->level,
5860                 .types = in->types,
5861                 .key_len = in->key_len,
5862                 .queue_num = in->queue_num,
5863                 .key = memcpy(out->key, in->key, in->key_len),
5864                 .queue = memcpy(out->queue, in->queue,
5865                                 sizeof(*in->queue) * in->queue_num),
5866         };
5867         return 0;
5868 }
5869
5870 int
5871 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5872                       const struct rte_flow_action_rss *with)
5873 {
5874         return (comp->func == with->func &&
5875                 comp->level == with->level &&
5876                 comp->types == with->types &&
5877                 comp->key_len == with->key_len &&
5878                 comp->queue_num == with->queue_num &&
5879                 !memcmp(comp->key, with->key, with->key_len) &&
5880                 !memcmp(comp->queue, with->queue,
5881                         sizeof(*with->queue) * with->queue_num));
5882 }
5883
5884 int
5885 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5886                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5887 {
5888         struct ixgbe_hw *hw;
5889         uint32_t reta;
5890         uint16_t i;
5891         uint16_t j;
5892         uint16_t sp_reta_size;
5893         uint32_t reta_reg;
5894         struct rte_eth_rss_conf rss_conf = {
5895                 .rss_key = conf->conf.key_len ?
5896                         (void *)(uintptr_t)conf->conf.key : NULL,
5897                 .rss_key_len = conf->conf.key_len,
5898                 .rss_hf = conf->conf.types,
5899         };
5900         struct ixgbe_filter_info *filter_info =
5901                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5902
5903         PMD_INIT_FUNC_TRACE();
5904         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5905
5906         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5907
5908         if (!add) {
5909                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5910                                           &conf->conf)) {
5911                         ixgbe_rss_disable(dev);
5912                         memset(&filter_info->rss_info, 0,
5913                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5914                         return 0;
5915                 }
5916                 return -EINVAL;
5917         }
5918
5919         if (filter_info->rss_info.conf.queue_num)
5920                 return -EINVAL;
5921         /* Fill in redirection table
5922          * The byte-swap is needed because NIC registers are in
5923          * little-endian order.
5924          */
5925         reta = 0;
5926         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5927                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5928
5929                 if (j == conf->conf.queue_num)
5930                         j = 0;
5931                 reta = (reta << 8) | conf->conf.queue[j];
5932                 if ((i & 3) == 3)
5933                         IXGBE_WRITE_REG(hw, reta_reg,
5934                                         rte_bswap32(reta));
5935         }
5936
5937         /* Configure the RSS key and the RSS protocols used to compute
5938          * the RSS hash of input packets.
5939          */
5940         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5941                 ixgbe_rss_disable(dev);
5942                 return 0;
5943         }
5944         if (rss_conf.rss_key == NULL)
5945                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5946         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5947
5948         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5949                 return -EINVAL;
5950
5951         return 0;
5952 }
5953
5954 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5955 #if defined(RTE_ARCH_PPC_64)
5956 int
5957 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5958 {
5959         return -1;
5960 }
5961
5962 uint16_t
5963 ixgbe_recv_pkts_vec(
5964         void __rte_unused *rx_queue,
5965         struct rte_mbuf __rte_unused **rx_pkts,
5966         uint16_t __rte_unused nb_pkts)
5967 {
5968         return 0;
5969 }
5970
5971 uint16_t
5972 ixgbe_recv_scattered_pkts_vec(
5973         void __rte_unused *rx_queue,
5974         struct rte_mbuf __rte_unused **rx_pkts,
5975         uint16_t __rte_unused nb_pkts)
5976 {
5977         return 0;
5978 }
5979
5980 int
5981 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5982 {
5983         return -1;
5984 }
5985
5986 uint16_t
5987 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5988                 struct rte_mbuf __rte_unused **tx_pkts,
5989                 uint16_t __rte_unused nb_pkts)
5990 {
5991         return 0;
5992 }
5993
5994 int
5995 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5996 {
5997         return -1;
5998 }
5999
6000 void
6001 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
6002 {
6003         return;
6004 }
6005 #endif