net/ixgbe: refine debug build option
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
39 #include <rte_udp.h>
40 #include <rte_tcp.h>
41 #include <rte_sctp.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
44 #include <rte_ip.h>
45 #include <rte_net.h>
46 #include <rte_vect.h>
47
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
55
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
58 #else
59 #define IXGBE_TX_IEEE1588_TMST 0
60 #endif
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK (                  \
63                 PKT_TX_OUTER_IPV6 |              \
64                 PKT_TX_OUTER_IPV4 |              \
65                 PKT_TX_IPV6 |                    \
66                 PKT_TX_IPV4 |                    \
67                 PKT_TX_VLAN_PKT |                \
68                 PKT_TX_IP_CKSUM |                \
69                 PKT_TX_L4_MASK |                 \
70                 PKT_TX_TCP_SEG |                 \
71                 PKT_TX_MACSEC |                  \
72                 PKT_TX_OUTER_IP_CKSUM |          \
73                 PKT_TX_SEC_OFFLOAD |     \
74                 IXGBE_TX_IEEE1588_TMST)
75
76 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
77                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
78
79 #if 1
80 #define RTE_PMD_USE_PREFETCH
81 #endif
82
83 #ifdef RTE_PMD_USE_PREFETCH
84 /*
85  * Prefetch a cache line into all cache levels.
86  */
87 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
88 #else
89 #define rte_ixgbe_prefetch(p)   do {} while (0)
90 #endif
91
92 /*********************************************************************
93  *
94  *  TX functions
95  *
96  **********************************************************************/
97
98 /*
99  * Check for descriptors with their DD bit set and free mbufs.
100  * Return the total number of buffers freed.
101  */
102 static __rte_always_inline int
103 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
104 {
105         struct ixgbe_tx_entry *txep;
106         uint32_t status;
107         int i, nb_free = 0;
108         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
109
110         /* check DD bit on threshold descriptor */
111         status = txq->tx_ring[txq->tx_next_dd].wb.status;
112         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
113                 return 0;
114
115         /*
116          * first buffer to free from S/W ring is at index
117          * tx_next_dd - (tx_rs_thresh-1)
118          */
119         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
120
121         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
122                 /* free buffers one at a time */
123                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
124                 txep->mbuf = NULL;
125
126                 if (unlikely(m == NULL))
127                         continue;
128
129                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
130                     (nb_free > 0 && m->pool != free[0]->pool)) {
131                         rte_mempool_put_bulk(free[0]->pool,
132                                              (void **)free, nb_free);
133                         nb_free = 0;
134                 }
135
136                 free[nb_free++] = m;
137         }
138
139         if (nb_free > 0)
140                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
141
142         /* buffers were freed, update counters */
143         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
144         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
145         if (txq->tx_next_dd >= txq->nb_tx_desc)
146                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
147
148         return txq->tx_rs_thresh;
149 }
150
151 /* Populate 4 descriptors with data from 4 mbufs */
152 static inline void
153 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
154 {
155         uint64_t buf_dma_addr;
156         uint32_t pkt_len;
157         int i;
158
159         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
160                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
161                 pkt_len = (*pkts)->data_len;
162
163                 /* write data to descriptor */
164                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
165
166                 txdp->read.cmd_type_len =
167                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
168
169                 txdp->read.olinfo_status =
170                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
171
172                 rte_prefetch0(&(*pkts)->pool);
173         }
174 }
175
176 /* Populate 1 descriptor with data from 1 mbuf */
177 static inline void
178 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
179 {
180         uint64_t buf_dma_addr;
181         uint32_t pkt_len;
182
183         buf_dma_addr = rte_mbuf_data_iova(*pkts);
184         pkt_len = (*pkts)->data_len;
185
186         /* write data to descriptor */
187         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
188         txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190         txdp->read.olinfo_status =
191                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
192         rte_prefetch0(&(*pkts)->pool);
193 }
194
195 /*
196  * Fill H/W descriptor ring with mbuf data.
197  * Copy mbuf pointers to the S/W ring.
198  */
199 static inline void
200 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
201                       uint16_t nb_pkts)
202 {
203         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
204         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
205         const int N_PER_LOOP = 4;
206         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
207         int mainpart, leftover;
208         int i, j;
209
210         /*
211          * Process most of the packets in chunks of N pkts.  Any
212          * leftover packets will get processed one at a time.
213          */
214         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
215         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
216         for (i = 0; i < mainpart; i += N_PER_LOOP) {
217                 /* Copy N mbuf pointers to the S/W ring */
218                 for (j = 0; j < N_PER_LOOP; ++j) {
219                         (txep + i + j)->mbuf = *(pkts + i + j);
220                 }
221                 tx4(txdp + i, pkts + i);
222         }
223
224         if (unlikely(leftover > 0)) {
225                 for (i = 0; i < leftover; ++i) {
226                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
227                         tx1(txdp + mainpart + i, pkts + mainpart + i);
228                 }
229         }
230 }
231
232 static inline uint16_t
233 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
234              uint16_t nb_pkts)
235 {
236         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
237         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
238         uint16_t n = 0;
239
240         /*
241          * Begin scanning the H/W ring for done descriptors when the
242          * number of available descriptors drops below tx_free_thresh.  For
243          * each done descriptor, free the associated buffer.
244          */
245         if (txq->nb_tx_free < txq->tx_free_thresh)
246                 ixgbe_tx_free_bufs(txq);
247
248         /* Only use descriptors that are available */
249         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
250         if (unlikely(nb_pkts == 0))
251                 return 0;
252
253         /* Use exactly nb_pkts descriptors */
254         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
255
256         /*
257          * At this point, we know there are enough descriptors in the
258          * ring to transmit all the packets.  This assumes that each
259          * mbuf contains a single segment, and that no new offloads
260          * are expected, which would require a new context descriptor.
261          */
262
263         /*
264          * See if we're going to wrap-around. If so, handle the top
265          * of the descriptor ring first, then do the bottom.  If not,
266          * the processing looks just like the "bottom" part anyway...
267          */
268         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
269                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
270                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
271
272                 /*
273                  * We know that the last descriptor in the ring will need to
274                  * have its RS bit set because tx_rs_thresh has to be
275                  * a divisor of the ring size
276                  */
277                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
278                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
279                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
280
281                 txq->tx_tail = 0;
282         }
283
284         /* Fill H/W descriptor ring with mbuf data */
285         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
286         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
287
288         /*
289          * Determine if RS bit should be set
290          * This is what we actually want:
291          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
292          * but instead of subtracting 1 and doing >=, we can just do
293          * greater than without subtracting.
294          */
295         if (txq->tx_tail > txq->tx_next_rs) {
296                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
297                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
298                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
299                                                 txq->tx_rs_thresh);
300                 if (txq->tx_next_rs >= txq->nb_tx_desc)
301                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302         }
303
304         /*
305          * Check for wrap-around. This would only happen if we used
306          * up to the last descriptor in the ring, no more, no less.
307          */
308         if (txq->tx_tail >= txq->nb_tx_desc)
309                 txq->tx_tail = 0;
310
311         /* update tail pointer */
312         rte_wmb();
313         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
314
315         return nb_pkts;
316 }
317
318 uint16_t
319 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
320                        uint16_t nb_pkts)
321 {
322         uint16_t nb_tx;
323
324         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
325         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
326                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
327
328         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
329         nb_tx = 0;
330         while (nb_pkts) {
331                 uint16_t ret, n;
332
333                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
334                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
335                 nb_tx = (uint16_t)(nb_tx + ret);
336                 nb_pkts = (uint16_t)(nb_pkts - ret);
337                 if (ret < n)
338                         break;
339         }
340
341         return nb_tx;
342 }
343
344 static uint16_t
345 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
346                     uint16_t nb_pkts)
347 {
348         uint16_t nb_tx = 0;
349         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
350
351         while (nb_pkts) {
352                 uint16_t ret, num;
353
354                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
355                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
356                                                  num);
357                 nb_tx += ret;
358                 nb_pkts -= ret;
359                 if (ret < num)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
370                 __rte_unused uint64_t *mdata)
371 {
372         uint32_t type_tucmd_mlhl;
373         uint32_t mss_l4len_idx = 0;
374         uint32_t ctx_idx;
375         uint32_t vlan_macip_lens;
376         union ixgbe_tx_offload tx_offload_mask;
377         uint32_t seqnum_seed = 0;
378
379         ctx_idx = txq->ctx_curr;
380         tx_offload_mask.data[0] = 0;
381         tx_offload_mask.data[1] = 0;
382         type_tucmd_mlhl = 0;
383
384         /* Specify which HW CTX to upload. */
385         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
386
387         if (ol_flags & PKT_TX_VLAN_PKT) {
388                 tx_offload_mask.vlan_tci |= ~0;
389         }
390
391         /* check if TCP segmentation required for this packet */
392         if (ol_flags & PKT_TX_TCP_SEG) {
393                 /* implies IP cksum in IPv4 */
394                 if (ol_flags & PKT_TX_IP_CKSUM)
395                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
396                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
397                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
398                 else
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
400                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
401                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
402
403                 tx_offload_mask.l2_len |= ~0;
404                 tx_offload_mask.l3_len |= ~0;
405                 tx_offload_mask.l4_len |= ~0;
406                 tx_offload_mask.tso_segsz |= ~0;
407                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
408                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
409         } else { /* no TSO, check if hardware checksum is needed */
410                 if (ol_flags & PKT_TX_IP_CKSUM) {
411                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
412                         tx_offload_mask.l2_len |= ~0;
413                         tx_offload_mask.l3_len |= ~0;
414                 }
415
416                 switch (ol_flags & PKT_TX_L4_MASK) {
417                 case PKT_TX_UDP_CKSUM:
418                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
419                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
421                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
422                         tx_offload_mask.l2_len |= ~0;
423                         tx_offload_mask.l3_len |= ~0;
424                         break;
425                 case PKT_TX_TCP_CKSUM:
426                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
429                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
430                         tx_offload_mask.l2_len |= ~0;
431                         tx_offload_mask.l3_len |= ~0;
432                         break;
433                 case PKT_TX_SCTP_CKSUM:
434                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
435                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
436                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
437                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
438                         tx_offload_mask.l2_len |= ~0;
439                         tx_offload_mask.l3_len |= ~0;
440                         break;
441                 default:
442                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
443                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
444                         break;
445                 }
446         }
447
448         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
449                 tx_offload_mask.outer_l2_len |= ~0;
450                 tx_offload_mask.outer_l3_len |= ~0;
451                 tx_offload_mask.l2_len |= ~0;
452                 seqnum_seed |= tx_offload.outer_l3_len
453                                << IXGBE_ADVTXD_OUTER_IPLEN;
454                 seqnum_seed |= tx_offload.l2_len
455                                << IXGBE_ADVTXD_TUNNEL_LEN;
456         }
457 #ifdef RTE_LIB_SECURITY
458         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
459                 union ixgbe_crypto_tx_desc_md *md =
460                                 (union ixgbe_crypto_tx_desc_md *)mdata;
461                 seqnum_seed |=
462                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
463                 type_tucmd_mlhl |= md->enc ?
464                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
465                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
466                 type_tucmd_mlhl |=
467                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
468                 tx_offload_mask.sa_idx |= ~0;
469                 tx_offload_mask.sec_pad_len |= ~0;
470         }
471 #endif
472
473         txq->ctx_cache[ctx_idx].flags = ol_flags;
474         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
475                 tx_offload_mask.data[0] & tx_offload.data[0];
476         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
477                 tx_offload_mask.data[1] & tx_offload.data[1];
478         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
479
480         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
481         vlan_macip_lens = tx_offload.l3_len;
482         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
483                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
484                                     IXGBE_ADVTXD_MACLEN_SHIFT);
485         else
486                 vlan_macip_lens |= (tx_offload.l2_len <<
487                                     IXGBE_ADVTXD_MACLEN_SHIFT);
488         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
489         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
490         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
491         ctx_txd->seqnum_seed     = seqnum_seed;
492 }
493
494 /*
495  * Check which hardware context can be used. Use the existing match
496  * or create a new context descriptor.
497  */
498 static inline uint32_t
499 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
500                    union ixgbe_tx_offload tx_offload)
501 {
502         /* If match with the current used context */
503         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
504                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
505                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
506                      & tx_offload.data[0])) &&
507                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
508                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
509                      & tx_offload.data[1]))))
510                 return txq->ctx_curr;
511
512         /* What if match with the next context  */
513         txq->ctx_curr ^= 1;
514         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
515                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
516                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
517                      & tx_offload.data[0])) &&
518                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
519                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
520                      & tx_offload.data[1]))))
521                 return txq->ctx_curr;
522
523         /* Mismatch, use the previous context */
524         return IXGBE_CTX_NUM;
525 }
526
527 static inline uint32_t
528 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
529 {
530         uint32_t tmp = 0;
531
532         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
534         if (ol_flags & PKT_TX_IP_CKSUM)
535                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
536         if (ol_flags & PKT_TX_TCP_SEG)
537                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
538         return tmp;
539 }
540
541 static inline uint32_t
542 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
543 {
544         uint32_t cmdtype = 0;
545
546         if (ol_flags & PKT_TX_VLAN_PKT)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
548         if (ol_flags & PKT_TX_TCP_SEG)
549                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
550         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
551                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
552         if (ol_flags & PKT_TX_MACSEC)
553                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
554         return cmdtype;
555 }
556
557 /* Default RS bit threshold values */
558 #ifndef DEFAULT_TX_RS_THRESH
559 #define DEFAULT_TX_RS_THRESH   32
560 #endif
561 #ifndef DEFAULT_TX_FREE_THRESH
562 #define DEFAULT_TX_FREE_THRESH 32
563 #endif
564
565 /* Reset transmit descriptors after they have been used */
566 static inline int
567 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
568 {
569         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
570         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
571         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
572         uint16_t nb_tx_desc = txq->nb_tx_desc;
573         uint16_t desc_to_clean_to;
574         uint16_t nb_tx_to_clean;
575         uint32_t status;
576
577         /* Determine the last descriptor needing to be cleaned */
578         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
579         if (desc_to_clean_to >= nb_tx_desc)
580                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
581
582         /* Check to make sure the last descriptor to clean is done */
583         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
584         status = txr[desc_to_clean_to].wb.status;
585         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
586                 PMD_TX_LOG(DEBUG,
587                            "TX descriptor %4u is not done"
588                            "(port=%d queue=%d)",
589                            desc_to_clean_to,
590                            txq->port_id, txq->queue_id);
591                 /* Failed to clean any descriptors, better luck next time */
592                 return -(1);
593         }
594
595         /* Figure out how many descriptors will be cleaned */
596         if (last_desc_cleaned > desc_to_clean_to)
597                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
598                                                         desc_to_clean_to);
599         else
600                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
601                                                 last_desc_cleaned);
602
603         PMD_TX_LOG(DEBUG,
604                    "Cleaning %4u TX descriptors: %4u to %4u "
605                    "(port=%d queue=%d)",
606                    nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
607                    txq->port_id, txq->queue_id);
608
609         /*
610          * The last descriptor to clean is done, so that means all the
611          * descriptors from the last descriptor that was cleaned
612          * up to the last descriptor with the RS bit set
613          * are done. Only reset the threshold descriptor.
614          */
615         txr[desc_to_clean_to].wb.status = 0;
616
617         /* Update the txq to reflect the last descriptor that was cleaned */
618         txq->last_desc_cleaned = desc_to_clean_to;
619         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
620
621         /* No Error */
622         return 0;
623 }
624
625 uint16_t
626 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
627                 uint16_t nb_pkts)
628 {
629         struct ixgbe_tx_queue *txq;
630         struct ixgbe_tx_entry *sw_ring;
631         struct ixgbe_tx_entry *txe, *txn;
632         volatile union ixgbe_adv_tx_desc *txr;
633         volatile union ixgbe_adv_tx_desc *txd, *txp;
634         struct rte_mbuf     *tx_pkt;
635         struct rte_mbuf     *m_seg;
636         uint64_t buf_dma_addr;
637         uint32_t olinfo_status;
638         uint32_t cmd_type_len;
639         uint32_t pkt_len;
640         uint16_t slen;
641         uint64_t ol_flags;
642         uint16_t tx_id;
643         uint16_t tx_last;
644         uint16_t nb_tx;
645         uint16_t nb_used;
646         uint64_t tx_ol_req;
647         uint32_t ctx = 0;
648         uint32_t new_ctx;
649         union ixgbe_tx_offload tx_offload;
650 #ifdef RTE_LIB_SECURITY
651         uint8_t use_ipsec;
652 #endif
653
654         tx_offload.data[0] = 0;
655         tx_offload.data[1] = 0;
656         txq = tx_queue;
657         sw_ring = txq->sw_ring;
658         txr     = txq->tx_ring;
659         tx_id   = txq->tx_tail;
660         txe = &sw_ring[tx_id];
661         txp = NULL;
662
663         /* Determine if the descriptor ring needs to be cleaned. */
664         if (txq->nb_tx_free < txq->tx_free_thresh)
665                 ixgbe_xmit_cleanup(txq);
666
667         rte_prefetch0(&txe->mbuf->pool);
668
669         /* TX loop */
670         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
671                 new_ctx = 0;
672                 tx_pkt = *tx_pkts++;
673                 pkt_len = tx_pkt->pkt_len;
674
675                 /*
676                  * Determine how many (if any) context descriptors
677                  * are needed for offload functionality.
678                  */
679                 ol_flags = tx_pkt->ol_flags;
680 #ifdef RTE_LIB_SECURITY
681                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
682 #endif
683
684                 /* If hardware offload required */
685                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
686                 if (tx_ol_req) {
687                         tx_offload.l2_len = tx_pkt->l2_len;
688                         tx_offload.l3_len = tx_pkt->l3_len;
689                         tx_offload.l4_len = tx_pkt->l4_len;
690                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
691                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
692                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
693                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
694 #ifdef RTE_LIB_SECURITY
695                         if (use_ipsec) {
696                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
697                                         (union ixgbe_crypto_tx_desc_md *)
698                                                 rte_security_dynfield(tx_pkt);
699                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
700                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
701                         }
702 #endif
703
704                         /* If new context need be built or reuse the exist ctx. */
705                         ctx = what_advctx_update(txq, tx_ol_req,
706                                 tx_offload);
707                         /* Only allocate context descriptor if required*/
708                         new_ctx = (ctx == IXGBE_CTX_NUM);
709                         ctx = txq->ctx_curr;
710                 }
711
712                 /*
713                  * Keep track of how many descriptors are used this loop
714                  * This will always be the number of segments + the number of
715                  * Context descriptors required to transmit the packet
716                  */
717                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
718
719                 if (txp != NULL &&
720                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
721                         /* set RS on the previous packet in the burst */
722                         txp->read.cmd_type_len |=
723                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
724
725                 /*
726                  * The number of descriptors that must be allocated for a
727                  * packet is the number of segments of that packet, plus 1
728                  * Context Descriptor for the hardware offload, if any.
729                  * Determine the last TX descriptor to allocate in the TX ring
730                  * for the packet, starting from the current position (tx_id)
731                  * in the ring.
732                  */
733                 tx_last = (uint16_t) (tx_id + nb_used - 1);
734
735                 /* Circular ring */
736                 if (tx_last >= txq->nb_tx_desc)
737                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
738
739                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
740                            " tx_first=%u tx_last=%u",
741                            (unsigned) txq->port_id,
742                            (unsigned) txq->queue_id,
743                            (unsigned) pkt_len,
744                            (unsigned) tx_id,
745                            (unsigned) tx_last);
746
747                 /*
748                  * Make sure there are enough TX descriptors available to
749                  * transmit the entire packet.
750                  * nb_used better be less than or equal to txq->tx_rs_thresh
751                  */
752                 if (nb_used > txq->nb_tx_free) {
753                         PMD_TX_LOG(DEBUG,
754                                    "Not enough free TX descriptors "
755                                    "nb_used=%4u nb_free=%4u "
756                                    "(port=%d queue=%d)",
757                                    nb_used, txq->nb_tx_free,
758                                    txq->port_id, txq->queue_id);
759
760                         if (ixgbe_xmit_cleanup(txq) != 0) {
761                                 /* Could not clean any descriptors */
762                                 if (nb_tx == 0)
763                                         return 0;
764                                 goto end_of_tx;
765                         }
766
767                         /* nb_used better be <= txq->tx_rs_thresh */
768                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
769                                 PMD_TX_LOG(DEBUG,
770                                            "The number of descriptors needed to "
771                                            "transmit the packet exceeds the "
772                                            "RS bit threshold. This will impact "
773                                            "performance."
774                                            "nb_used=%4u nb_free=%4u "
775                                            "tx_rs_thresh=%4u. "
776                                            "(port=%d queue=%d)",
777                                            nb_used, txq->nb_tx_free,
778                                            txq->tx_rs_thresh,
779                                            txq->port_id, txq->queue_id);
780                                 /*
781                                  * Loop here until there are enough TX
782                                  * descriptors or until the ring cannot be
783                                  * cleaned.
784                                  */
785                                 while (nb_used > txq->nb_tx_free) {
786                                         if (ixgbe_xmit_cleanup(txq) != 0) {
787                                                 /*
788                                                  * Could not clean any
789                                                  * descriptors
790                                                  */
791                                                 if (nb_tx == 0)
792                                                         return 0;
793                                                 goto end_of_tx;
794                                         }
795                                 }
796                         }
797                 }
798
799                 /*
800                  * By now there are enough free TX descriptors to transmit
801                  * the packet.
802                  */
803
804                 /*
805                  * Set common flags of all TX Data Descriptors.
806                  *
807                  * The following bits must be set in all Data Descriptors:
808                  *   - IXGBE_ADVTXD_DTYP_DATA
809                  *   - IXGBE_ADVTXD_DCMD_DEXT
810                  *
811                  * The following bits must be set in the first Data Descriptor
812                  * and are ignored in the other ones:
813                  *   - IXGBE_ADVTXD_DCMD_IFCS
814                  *   - IXGBE_ADVTXD_MAC_1588
815                  *   - IXGBE_ADVTXD_DCMD_VLE
816                  *
817                  * The following bits must only be set in the last Data
818                  * Descriptor:
819                  *   - IXGBE_TXD_CMD_EOP
820                  *
821                  * The following bits can be set in any Data Descriptor, but
822                  * are only set in the last Data Descriptor:
823                  *   - IXGBE_TXD_CMD_RS
824                  */
825                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
826                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
827
828 #ifdef RTE_LIBRTE_IEEE1588
829                 if (ol_flags & PKT_TX_IEEE1588_TMST)
830                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
831 #endif
832
833                 olinfo_status = 0;
834                 if (tx_ol_req) {
835
836                         if (ol_flags & PKT_TX_TCP_SEG) {
837                                 /* when TSO is on, paylen in descriptor is the
838                                  * not the packet len but the tcp payload len */
839                                 pkt_len -= (tx_offload.l2_len +
840                                         tx_offload.l3_len + tx_offload.l4_len);
841                         }
842
843                         /*
844                          * Setup the TX Advanced Context Descriptor if required
845                          */
846                         if (new_ctx) {
847                                 volatile struct ixgbe_adv_tx_context_desc *
848                                     ctx_txd;
849
850                                 ctx_txd = (volatile struct
851                                     ixgbe_adv_tx_context_desc *)
852                                     &txr[tx_id];
853
854                                 txn = &sw_ring[txe->next_id];
855                                 rte_prefetch0(&txn->mbuf->pool);
856
857                                 if (txe->mbuf != NULL) {
858                                         rte_pktmbuf_free_seg(txe->mbuf);
859                                         txe->mbuf = NULL;
860                                 }
861
862                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
863                                         tx_offload,
864                                         rte_security_dynfield(tx_pkt));
865
866                                 txe->last_id = tx_last;
867                                 tx_id = txe->next_id;
868                                 txe = txn;
869                         }
870
871                         /*
872                          * Setup the TX Advanced Data Descriptor,
873                          * This path will go through
874                          * whatever new/reuse the context descriptor
875                          */
876                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
877                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
878                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
879                 }
880
881                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
882 #ifdef RTE_LIB_SECURITY
883                 if (use_ipsec)
884                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
885 #endif
886
887                 m_seg = tx_pkt;
888                 do {
889                         txd = &txr[tx_id];
890                         txn = &sw_ring[txe->next_id];
891                         rte_prefetch0(&txn->mbuf->pool);
892
893                         if (txe->mbuf != NULL)
894                                 rte_pktmbuf_free_seg(txe->mbuf);
895                         txe->mbuf = m_seg;
896
897                         /*
898                          * Set up Transmit Data Descriptor.
899                          */
900                         slen = m_seg->data_len;
901                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
902                         txd->read.buffer_addr =
903                                 rte_cpu_to_le_64(buf_dma_addr);
904                         txd->read.cmd_type_len =
905                                 rte_cpu_to_le_32(cmd_type_len | slen);
906                         txd->read.olinfo_status =
907                                 rte_cpu_to_le_32(olinfo_status);
908                         txe->last_id = tx_last;
909                         tx_id = txe->next_id;
910                         txe = txn;
911                         m_seg = m_seg->next;
912                 } while (m_seg != NULL);
913
914                 /*
915                  * The last packet data descriptor needs End Of Packet (EOP)
916                  */
917                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
918                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
919                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
920
921                 /* Set RS bit only on threshold packets' last descriptor */
922                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
923                         PMD_TX_LOG(DEBUG,
924                                    "Setting RS bit on TXD id="
925                                    "%4u (port=%d queue=%d)",
926                                    tx_last, txq->port_id, txq->queue_id);
927
928                         cmd_type_len |= IXGBE_TXD_CMD_RS;
929
930                         /* Update txq RS bit counters */
931                         txq->nb_tx_used = 0;
932                         txp = NULL;
933                 } else
934                         txp = txd;
935
936                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
937         }
938
939 end_of_tx:
940         /* set RS on last packet in the burst */
941         if (txp != NULL)
942                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
943
944         rte_wmb();
945
946         /*
947          * Set the Transmit Descriptor Tail (TDT)
948          */
949         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
950                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
951                    (unsigned) tx_id, (unsigned) nb_tx);
952         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
953         txq->tx_tail = tx_id;
954
955         return nb_tx;
956 }
957
958 /*********************************************************************
959  *
960  *  TX prep functions
961  *
962  **********************************************************************/
963 uint16_t
964 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
965 {
966         int i, ret;
967         uint64_t ol_flags;
968         struct rte_mbuf *m;
969         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
970
971         for (i = 0; i < nb_pkts; i++) {
972                 m = tx_pkts[i];
973                 ol_flags = m->ol_flags;
974
975                 /**
976                  * Check if packet meets requirements for number of segments
977                  *
978                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
979                  *       non-TSO
980                  */
981
982                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
983                         rte_errno = EINVAL;
984                         return i;
985                 }
986
987                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
988                         rte_errno = ENOTSUP;
989                         return i;
990                 }
991
992                 /* check the size of packet */
993                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
994                         rte_errno = EINVAL;
995                         return i;
996                 }
997
998 #ifdef RTE_ETHDEV_DEBUG_TX
999                 ret = rte_validate_tx_offload(m);
1000                 if (ret != 0) {
1001                         rte_errno = -ret;
1002                         return i;
1003                 }
1004 #endif
1005                 ret = rte_net_intel_cksum_prepare(m);
1006                 if (ret != 0) {
1007                         rte_errno = -ret;
1008                         return i;
1009                 }
1010         }
1011
1012         return i;
1013 }
1014
1015 /*********************************************************************
1016  *
1017  *  RX functions
1018  *
1019  **********************************************************************/
1020
1021 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1022 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1023 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1024 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1025 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1030 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1031 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1032 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1033 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1036 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1037 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1041 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1045 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1049 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1054
1055 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1078
1079 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1102
1103 /**
1104  * Use 2 different table for normal packet and tunnel packet
1105  * to save the space.
1106  */
1107 const uint32_t
1108         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1109         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1110         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4,
1112         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1114         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1116         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1122         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1124         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1126         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6,
1128         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1130         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1132         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1138         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1140         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6,
1145         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1148         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1151         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6,
1157         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1160         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1163         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1175         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1185                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1187         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1188                 RTE_PTYPE_L2_ETHER |
1189                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1191 };
1192
1193 const uint32_t
1194         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1195         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1210         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1216         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1217                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1219                 RTE_PTYPE_INNER_L4_TCP,
1220         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1221                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1223                 RTE_PTYPE_INNER_L4_TCP,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1227         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1230                 RTE_PTYPE_INNER_L4_TCP,
1231         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1232                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1233                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1234                 RTE_PTYPE_INNER_L3_IPV4,
1235         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1236                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1238                 RTE_PTYPE_INNER_L4_UDP,
1239         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1242                 RTE_PTYPE_INNER_L4_UDP,
1243         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1244                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1246                 RTE_PTYPE_INNER_L4_SCTP,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1250         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1251                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1253                 RTE_PTYPE_INNER_L4_UDP,
1254         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1255                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1257                 RTE_PTYPE_INNER_L4_SCTP,
1258         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1259                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1260                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1261                 RTE_PTYPE_INNER_L3_IPV4,
1262         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1263                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1265                 RTE_PTYPE_INNER_L4_SCTP,
1266         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1267                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                 RTE_PTYPE_INNER_L4_SCTP,
1270         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1273                 RTE_PTYPE_INNER_L4_TCP,
1274         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1275                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1277                 RTE_PTYPE_INNER_L4_UDP,
1278
1279         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1282         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1283                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                 RTE_PTYPE_INNER_L3_IPV4,
1286         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1290         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                 RTE_PTYPE_INNER_L3_IPV6,
1294         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                 RTE_PTYPE_INNER_L3_IPV4,
1298         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1299                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1302         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1303                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                 RTE_PTYPE_INNER_L3_IPV4,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1307                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV4,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1319                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1323                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1324                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1325                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1335                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV4,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1343                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1347                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1351                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1352                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1353                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1354         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1355                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1358         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1359                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1362         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1363                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1366         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1367                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1370 };
1371
1372 int
1373 ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1374 {
1375         volatile union ixgbe_adv_rx_desc *rxdp;
1376         struct ixgbe_rx_queue *rxq = rx_queue;
1377         uint16_t desc;
1378
1379         desc = rxq->rx_tail;
1380         rxdp = &rxq->rx_ring[desc];
1381         /* watch for changes in status bit */
1382         pmc->addr = &rxdp->wb.upper.status_error;
1383
1384         /*
1385          * we expect the DD bit to be set to 1 if this descriptor was already
1386          * written to.
1387          */
1388         pmc->val = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1389         pmc->mask = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1390
1391         /* the registers are 32-bit */
1392         pmc->size = sizeof(uint32_t);
1393
1394         return 0;
1395 }
1396
1397 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1398 static inline uint32_t
1399 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1400 {
1401
1402         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1403                 return RTE_PTYPE_UNKNOWN;
1404
1405         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1406
1407         /* For tunnel packet */
1408         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1409                 /* Remove the tunnel bit to save the space. */
1410                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1411                 return ptype_table_tn[pkt_info];
1412         }
1413
1414         /**
1415          * For x550, if it's not tunnel,
1416          * tunnel type bit should be set to 0.
1417          * Reuse 82599's mask.
1418          */
1419         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1420
1421         return ptype_table[pkt_info];
1422 }
1423
1424 static inline uint64_t
1425 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1426 {
1427         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1428                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1429                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1430                 PKT_RX_RSS_HASH, 0, 0, 0,
1431                 0, 0, 0,  PKT_RX_FDIR,
1432         };
1433 #ifdef RTE_LIBRTE_IEEE1588
1434         static uint64_t ip_pkt_etqf_map[8] = {
1435                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1436                 0, 0, 0, 0,
1437         };
1438
1439         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1440                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1441                                 ip_rss_types_map[pkt_info & 0XF];
1442         else
1443                 return ip_rss_types_map[pkt_info & 0XF];
1444 #else
1445         return ip_rss_types_map[pkt_info & 0XF];
1446 #endif
1447 }
1448
1449 static inline uint64_t
1450 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1451 {
1452         uint64_t pkt_flags;
1453
1454         /*
1455          * Check if VLAN present only.
1456          * Do not check whether L3/L4 rx checksum done by NIC or not,
1457          * That can be found from rte_eth_rxmode.offloads flag
1458          */
1459         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1460
1461 #ifdef RTE_LIBRTE_IEEE1588
1462         if (rx_status & IXGBE_RXD_STAT_TMST)
1463                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1464 #endif
1465         return pkt_flags;
1466 }
1467
1468 static inline uint64_t
1469 rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
1470                            uint8_t rx_udp_csum_zero_err)
1471 {
1472         uint64_t pkt_flags;
1473
1474         /*
1475          * Bit 31: IPE, IPv4 checksum error
1476          * Bit 30: L4I, L4I integrity error
1477          */
1478         static uint64_t error_to_pkt_flags_map[4] = {
1479                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1480                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1481                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1482                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1483         };
1484         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1485                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1486
1487         /* Mask out the bad UDP checksum error if the hardware has UDP zero
1488          * checksum error issue, so that the software application will then
1489          * have to recompute the checksum itself if needed.
1490          */
1491         if ((rx_status & IXGBE_RXDADV_ERR_TCPE) &&
1492             (pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) &&
1493             rx_udp_csum_zero_err)
1494                 pkt_flags &= ~PKT_RX_L4_CKSUM_BAD;
1495
1496         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1497             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1498                 pkt_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
1499         }
1500
1501 #ifdef RTE_LIB_SECURITY
1502         if (rx_status & IXGBE_RXD_STAT_SECP) {
1503                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1504                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1505                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1506         }
1507 #endif
1508
1509         return pkt_flags;
1510 }
1511
1512 /*
1513  * LOOK_AHEAD defines how many desc statuses to check beyond the
1514  * current descriptor.
1515  * It must be a pound define for optimal performance.
1516  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1517  * function only works with LOOK_AHEAD=8.
1518  */
1519 #define LOOK_AHEAD 8
1520 #if (LOOK_AHEAD != 8)
1521 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1522 #endif
1523 static inline int
1524 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1525 {
1526         volatile union ixgbe_adv_rx_desc *rxdp;
1527         struct ixgbe_rx_entry *rxep;
1528         struct rte_mbuf *mb;
1529         uint16_t pkt_len;
1530         uint64_t pkt_flags;
1531         int nb_dd;
1532         uint32_t s[LOOK_AHEAD];
1533         uint32_t pkt_info[LOOK_AHEAD];
1534         int i, j, nb_rx = 0;
1535         uint32_t status;
1536         uint64_t vlan_flags = rxq->vlan_flags;
1537
1538         /* get references to current descriptor and S/W ring entry */
1539         rxdp = &rxq->rx_ring[rxq->rx_tail];
1540         rxep = &rxq->sw_ring[rxq->rx_tail];
1541
1542         status = rxdp->wb.upper.status_error;
1543         /* check to make sure there is at least 1 packet to receive */
1544         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1545                 return 0;
1546
1547         /*
1548          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1549          * reference packets that are ready to be received.
1550          */
1551         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1552              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1553                 /* Read desc statuses backwards to avoid race condition */
1554                 for (j = 0; j < LOOK_AHEAD; j++)
1555                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1556
1557                 rte_smp_rmb();
1558
1559                 /* Compute how many status bits were set */
1560                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1561                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1562                         ;
1563
1564                 for (j = 0; j < nb_dd; j++)
1565                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1566                                                        lo_dword.data);
1567
1568                 nb_rx += nb_dd;
1569
1570                 /* Translate descriptor info to mbuf format */
1571                 for (j = 0; j < nb_dd; ++j) {
1572                         mb = rxep[j].mbuf;
1573                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1574                                   rxq->crc_len;
1575                         mb->data_len = pkt_len;
1576                         mb->pkt_len = pkt_len;
1577                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1578
1579                         /* convert descriptor fields to rte mbuf flags */
1580                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1581                                 vlan_flags);
1582                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j],
1583                                         (uint16_t)pkt_info[j],
1584                                         rxq->rx_udp_csum_zero_err);
1585                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1586                                         ((uint16_t)pkt_info[j]);
1587                         mb->ol_flags = pkt_flags;
1588                         mb->packet_type =
1589                                 ixgbe_rxd_pkt_info_to_pkt_type
1590                                         (pkt_info[j], rxq->pkt_type_mask);
1591
1592                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1593                                 mb->hash.rss = rte_le_to_cpu_32(
1594                                     rxdp[j].wb.lower.hi_dword.rss);
1595                         else if (pkt_flags & PKT_RX_FDIR) {
1596                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1597                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1598                                     IXGBE_ATR_HASH_MASK;
1599                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1600                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1601                         }
1602                 }
1603
1604                 /* Move mbuf pointers from the S/W ring to the stage */
1605                 for (j = 0; j < LOOK_AHEAD; ++j) {
1606                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1607                 }
1608
1609                 /* stop if all requested packets could not be received */
1610                 if (nb_dd != LOOK_AHEAD)
1611                         break;
1612         }
1613
1614         /* clear software ring entries so we can cleanup correctly */
1615         for (i = 0; i < nb_rx; ++i) {
1616                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1617         }
1618
1619
1620         return nb_rx;
1621 }
1622
1623 static inline int
1624 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1625 {
1626         volatile union ixgbe_adv_rx_desc *rxdp;
1627         struct ixgbe_rx_entry *rxep;
1628         struct rte_mbuf *mb;
1629         uint16_t alloc_idx;
1630         __le64 dma_addr;
1631         int diag, i;
1632
1633         /* allocate buffers in bulk directly into the S/W ring */
1634         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1635         rxep = &rxq->sw_ring[alloc_idx];
1636         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1637                                     rxq->rx_free_thresh);
1638         if (unlikely(diag != 0))
1639                 return -ENOMEM;
1640
1641         rxdp = &rxq->rx_ring[alloc_idx];
1642         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1643                 /* populate the static rte mbuf fields */
1644                 mb = rxep[i].mbuf;
1645                 if (reset_mbuf) {
1646                         mb->port = rxq->port_id;
1647                 }
1648
1649                 rte_mbuf_refcnt_set(mb, 1);
1650                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1651
1652                 /* populate the descriptors */
1653                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1654                 rxdp[i].read.hdr_addr = 0;
1655                 rxdp[i].read.pkt_addr = dma_addr;
1656         }
1657
1658         /* update state of internal queue structure */
1659         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1660         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1661                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1662
1663         /* no errors */
1664         return 0;
1665 }
1666
1667 static inline uint16_t
1668 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1669                          uint16_t nb_pkts)
1670 {
1671         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1672         int i;
1673
1674         /* how many packets are ready to return? */
1675         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1676
1677         /* copy mbuf pointers to the application's packet list */
1678         for (i = 0; i < nb_pkts; ++i)
1679                 rx_pkts[i] = stage[i];
1680
1681         /* update internal queue state */
1682         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1683         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1684
1685         return nb_pkts;
1686 }
1687
1688 static inline uint16_t
1689 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1690              uint16_t nb_pkts)
1691 {
1692         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1693         uint16_t nb_rx = 0;
1694
1695         /* Any previously recv'd pkts will be returned from the Rx stage */
1696         if (rxq->rx_nb_avail)
1697                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1698
1699         /* Scan the H/W ring for packets to receive */
1700         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1701
1702         /* update internal queue state */
1703         rxq->rx_next_avail = 0;
1704         rxq->rx_nb_avail = nb_rx;
1705         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1706
1707         /* if required, allocate new buffers to replenish descriptors */
1708         if (rxq->rx_tail > rxq->rx_free_trigger) {
1709                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1710
1711                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1712                         int i, j;
1713
1714                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1715                                    "queue_id=%u", (unsigned) rxq->port_id,
1716                                    (unsigned) rxq->queue_id);
1717
1718                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1719                                 rxq->rx_free_thresh;
1720
1721                         /*
1722                          * Need to rewind any previous receives if we cannot
1723                          * allocate new buffers to replenish the old ones.
1724                          */
1725                         rxq->rx_nb_avail = 0;
1726                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1727                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1728                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1729
1730                         return 0;
1731                 }
1732
1733                 /* update tail pointer */
1734                 rte_wmb();
1735                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1736                                             cur_free_trigger);
1737         }
1738
1739         if (rxq->rx_tail >= rxq->nb_rx_desc)
1740                 rxq->rx_tail = 0;
1741
1742         /* received any packets this loop? */
1743         if (rxq->rx_nb_avail)
1744                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1745
1746         return 0;
1747 }
1748
1749 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1750 uint16_t
1751 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1752                            uint16_t nb_pkts)
1753 {
1754         uint16_t nb_rx;
1755
1756         if (unlikely(nb_pkts == 0))
1757                 return 0;
1758
1759         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1760                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1761
1762         /* request is relatively large, chunk it up */
1763         nb_rx = 0;
1764         while (nb_pkts) {
1765                 uint16_t ret, n;
1766
1767                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1768                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1769                 nb_rx = (uint16_t)(nb_rx + ret);
1770                 nb_pkts = (uint16_t)(nb_pkts - ret);
1771                 if (ret < n)
1772                         break;
1773         }
1774
1775         return nb_rx;
1776 }
1777
1778 uint16_t
1779 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1780                 uint16_t nb_pkts)
1781 {
1782         struct ixgbe_rx_queue *rxq;
1783         volatile union ixgbe_adv_rx_desc *rx_ring;
1784         volatile union ixgbe_adv_rx_desc *rxdp;
1785         struct ixgbe_rx_entry *sw_ring;
1786         struct ixgbe_rx_entry *rxe;
1787         struct rte_mbuf *rxm;
1788         struct rte_mbuf *nmb;
1789         union ixgbe_adv_rx_desc rxd;
1790         uint64_t dma_addr;
1791         uint32_t staterr;
1792         uint32_t pkt_info;
1793         uint16_t pkt_len;
1794         uint16_t rx_id;
1795         uint16_t nb_rx;
1796         uint16_t nb_hold;
1797         uint64_t pkt_flags;
1798         uint64_t vlan_flags;
1799
1800         nb_rx = 0;
1801         nb_hold = 0;
1802         rxq = rx_queue;
1803         rx_id = rxq->rx_tail;
1804         rx_ring = rxq->rx_ring;
1805         sw_ring = rxq->sw_ring;
1806         vlan_flags = rxq->vlan_flags;
1807         while (nb_rx < nb_pkts) {
1808                 /*
1809                  * The order of operations here is important as the DD status
1810                  * bit must not be read after any other descriptor fields.
1811                  * rx_ring and rxdp are pointing to volatile data so the order
1812                  * of accesses cannot be reordered by the compiler. If they were
1813                  * not volatile, they could be reordered which could lead to
1814                  * using invalid descriptor fields when read from rxd.
1815                  */
1816                 rxdp = &rx_ring[rx_id];
1817                 staterr = rxdp->wb.upper.status_error;
1818                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1819                         break;
1820                 rxd = *rxdp;
1821
1822                 /*
1823                  * End of packet.
1824                  *
1825                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1826                  * is likely to be invalid and to be dropped by the various
1827                  * validation checks performed by the network stack.
1828                  *
1829                  * Allocate a new mbuf to replenish the RX ring descriptor.
1830                  * If the allocation fails:
1831                  *    - arrange for that RX descriptor to be the first one
1832                  *      being parsed the next time the receive function is
1833                  *      invoked [on the same queue].
1834                  *
1835                  *    - Stop parsing the RX ring and return immediately.
1836                  *
1837                  * This policy do not drop the packet received in the RX
1838                  * descriptor for which the allocation of a new mbuf failed.
1839                  * Thus, it allows that packet to be later retrieved if
1840                  * mbuf have been freed in the mean time.
1841                  * As a side effect, holding RX descriptors instead of
1842                  * systematically giving them back to the NIC may lead to
1843                  * RX ring exhaustion situations.
1844                  * However, the NIC can gracefully prevent such situations
1845                  * to happen by sending specific "back-pressure" flow control
1846                  * frames to its peer(s).
1847                  */
1848                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1849                            "ext_err_stat=0x%08x pkt_len=%u",
1850                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1851                            (unsigned) rx_id, (unsigned) staterr,
1852                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1853
1854                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1855                 if (nmb == NULL) {
1856                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1857                                    "queue_id=%u", (unsigned) rxq->port_id,
1858                                    (unsigned) rxq->queue_id);
1859                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1860                         break;
1861                 }
1862
1863                 nb_hold++;
1864                 rxe = &sw_ring[rx_id];
1865                 rx_id++;
1866                 if (rx_id == rxq->nb_rx_desc)
1867                         rx_id = 0;
1868
1869                 /* Prefetch next mbuf while processing current one. */
1870                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1871
1872                 /*
1873                  * When next RX descriptor is on a cache-line boundary,
1874                  * prefetch the next 4 RX descriptors and the next 8 pointers
1875                  * to mbufs.
1876                  */
1877                 if ((rx_id & 0x3) == 0) {
1878                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1879                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1880                 }
1881
1882                 rxm = rxe->mbuf;
1883                 rxe->mbuf = nmb;
1884                 dma_addr =
1885                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1886                 rxdp->read.hdr_addr = 0;
1887                 rxdp->read.pkt_addr = dma_addr;
1888
1889                 /*
1890                  * Initialize the returned mbuf.
1891                  * 1) setup generic mbuf fields:
1892                  *    - number of segments,
1893                  *    - next segment,
1894                  *    - packet length,
1895                  *    - RX port identifier.
1896                  * 2) integrate hardware offload data, if any:
1897                  *    - RSS flag & hash,
1898                  *    - IP checksum flag,
1899                  *    - VLAN TCI, if any,
1900                  *    - error flags.
1901                  */
1902                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1903                                       rxq->crc_len);
1904                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1905                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1906                 rxm->nb_segs = 1;
1907                 rxm->next = NULL;
1908                 rxm->pkt_len = pkt_len;
1909                 rxm->data_len = pkt_len;
1910                 rxm->port = rxq->port_id;
1911
1912                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1913                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1914                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1915
1916                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1917                 pkt_flags = pkt_flags |
1918                         rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
1919                                                    rxq->rx_udp_csum_zero_err);
1920                 pkt_flags = pkt_flags |
1921                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1922                 rxm->ol_flags = pkt_flags;
1923                 rxm->packet_type =
1924                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1925                                                        rxq->pkt_type_mask);
1926
1927                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1928                         rxm->hash.rss = rte_le_to_cpu_32(
1929                                                 rxd.wb.lower.hi_dword.rss);
1930                 else if (pkt_flags & PKT_RX_FDIR) {
1931                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1932                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1933                                         IXGBE_ATR_HASH_MASK;
1934                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1935                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1936                 }
1937                 /*
1938                  * Store the mbuf address into the next entry of the array
1939                  * of returned packets.
1940                  */
1941                 rx_pkts[nb_rx++] = rxm;
1942         }
1943         rxq->rx_tail = rx_id;
1944
1945         /*
1946          * If the number of free RX descriptors is greater than the RX free
1947          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1948          * register.
1949          * Update the RDT with the value of the last processed RX descriptor
1950          * minus 1, to guarantee that the RDT register is never equal to the
1951          * RDH register, which creates a "full" ring situtation from the
1952          * hardware point of view...
1953          */
1954         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1955         if (nb_hold > rxq->rx_free_thresh) {
1956                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1957                            "nb_hold=%u nb_rx=%u",
1958                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1959                            (unsigned) rx_id, (unsigned) nb_hold,
1960                            (unsigned) nb_rx);
1961                 rx_id = (uint16_t) ((rx_id == 0) ?
1962                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1963                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1964                 nb_hold = 0;
1965         }
1966         rxq->nb_rx_hold = nb_hold;
1967         return nb_rx;
1968 }
1969
1970 /**
1971  * Detect an RSC descriptor.
1972  */
1973 static inline uint32_t
1974 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1975 {
1976         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1977                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1978 }
1979
1980 /**
1981  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1982  *
1983  * Fill the following info in the HEAD buffer of the Rx cluster:
1984  *    - RX port identifier
1985  *    - hardware offload data, if any:
1986  *      - RSS flag & hash
1987  *      - IP checksum flag
1988  *      - VLAN TCI, if any
1989  *      - error flags
1990  * @head HEAD of the packet cluster
1991  * @desc HW descriptor to get data from
1992  * @rxq Pointer to the Rx queue
1993  */
1994 static inline void
1995 ixgbe_fill_cluster_head_buf(
1996         struct rte_mbuf *head,
1997         union ixgbe_adv_rx_desc *desc,
1998         struct ixgbe_rx_queue *rxq,
1999         uint32_t staterr)
2000 {
2001         uint32_t pkt_info;
2002         uint64_t pkt_flags;
2003
2004         head->port = rxq->port_id;
2005
2006         /* The vlan_tci field is only valid when PKT_RX_VLAN is
2007          * set in the pkt_flags field.
2008          */
2009         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
2010         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
2011         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
2012         pkt_flags |= rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
2013                                                 rxq->rx_udp_csum_zero_err);
2014         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2015         head->ol_flags = pkt_flags;
2016         head->packet_type =
2017                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2018
2019         if (likely(pkt_flags & PKT_RX_RSS_HASH))
2020                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2021         else if (pkt_flags & PKT_RX_FDIR) {
2022                 head->hash.fdir.hash =
2023                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2024                                                           & IXGBE_ATR_HASH_MASK;
2025                 head->hash.fdir.id =
2026                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2027         }
2028 }
2029
2030 /**
2031  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2032  *
2033  * @rx_queue Rx queue handle
2034  * @rx_pkts table of received packets
2035  * @nb_pkts size of rx_pkts table
2036  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2037  *
2038  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2039  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2040  *
2041  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2042  * 1) When non-EOP RSC completion arrives:
2043  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2044  *       segment's data length.
2045  *    b) Set the "next" pointer of the current segment to point to the segment
2046  *       at the NEXTP index.
2047  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2048  *       in the sw_rsc_ring.
2049  * 2) When EOP arrives we just update the cluster's total length and offload
2050  *    flags and deliver the cluster up to the upper layers. In our case - put it
2051  *    in the rx_pkts table.
2052  *
2053  * Returns the number of received packets/clusters (according to the "bulk
2054  * receive" interface).
2055  */
2056 static inline uint16_t
2057 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2058                     bool bulk_alloc)
2059 {
2060         struct ixgbe_rx_queue *rxq = rx_queue;
2061         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2062         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2063         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2064         uint16_t rx_id = rxq->rx_tail;
2065         uint16_t nb_rx = 0;
2066         uint16_t nb_hold = rxq->nb_rx_hold;
2067         uint16_t prev_id = rxq->rx_tail;
2068
2069         while (nb_rx < nb_pkts) {
2070                 bool eop;
2071                 struct ixgbe_rx_entry *rxe;
2072                 struct ixgbe_scattered_rx_entry *sc_entry;
2073                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2074                 struct ixgbe_rx_entry *next_rxe = NULL;
2075                 struct rte_mbuf *first_seg;
2076                 struct rte_mbuf *rxm;
2077                 struct rte_mbuf *nmb = NULL;
2078                 union ixgbe_adv_rx_desc rxd;
2079                 uint16_t data_len;
2080                 uint16_t next_id;
2081                 volatile union ixgbe_adv_rx_desc *rxdp;
2082                 uint32_t staterr;
2083
2084 next_desc:
2085                 /*
2086                  * The code in this whole file uses the volatile pointer to
2087                  * ensure the read ordering of the status and the rest of the
2088                  * descriptor fields (on the compiler level only!!!). This is so
2089                  * UGLY - why not to just use the compiler barrier instead? DPDK
2090                  * even has the rte_compiler_barrier() for that.
2091                  *
2092                  * But most importantly this is just wrong because this doesn't
2093                  * ensure memory ordering in a general case at all. For
2094                  * instance, DPDK is supposed to work on Power CPUs where
2095                  * compiler barrier may just not be enough!
2096                  *
2097                  * I tried to write only this function properly to have a
2098                  * starting point (as a part of an LRO/RSC series) but the
2099                  * compiler cursed at me when I tried to cast away the
2100                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2101                  * keeping it the way it is for now.
2102                  *
2103                  * The code in this file is broken in so many other places and
2104                  * will just not work on a big endian CPU anyway therefore the
2105                  * lines below will have to be revisited together with the rest
2106                  * of the ixgbe PMD.
2107                  *
2108                  * TODO:
2109                  *    - Get rid of "volatile" and let the compiler do its job.
2110                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2111                  *      memory ordering below.
2112                  */
2113                 rxdp = &rx_ring[rx_id];
2114                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2115
2116                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2117                         break;
2118
2119                 rxd = *rxdp;
2120
2121                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2122                                   "staterr=0x%x data_len=%u",
2123                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2124                            rte_le_to_cpu_16(rxd.wb.upper.length));
2125
2126                 if (!bulk_alloc) {
2127                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2128                         if (nmb == NULL) {
2129                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2130                                                   "port_id=%u queue_id=%u",
2131                                            rxq->port_id, rxq->queue_id);
2132
2133                                 rte_eth_devices[rxq->port_id].data->
2134                                                         rx_mbuf_alloc_failed++;
2135                                 break;
2136                         }
2137                 } else if (nb_hold > rxq->rx_free_thresh) {
2138                         uint16_t next_rdt = rxq->rx_free_trigger;
2139
2140                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2141                                 rte_wmb();
2142                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2143                                                         rxq->rdt_reg_addr,
2144                                                         next_rdt);
2145                                 nb_hold -= rxq->rx_free_thresh;
2146                         } else {
2147                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2148                                                   "port_id=%u queue_id=%u",
2149                                            rxq->port_id, rxq->queue_id);
2150
2151                                 rte_eth_devices[rxq->port_id].data->
2152                                                         rx_mbuf_alloc_failed++;
2153                                 break;
2154                         }
2155                 }
2156
2157                 nb_hold++;
2158                 rxe = &sw_ring[rx_id];
2159                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2160
2161                 next_id = rx_id + 1;
2162                 if (next_id == rxq->nb_rx_desc)
2163                         next_id = 0;
2164
2165                 /* Prefetch next mbuf while processing current one. */
2166                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2167
2168                 /*
2169                  * When next RX descriptor is on a cache-line boundary,
2170                  * prefetch the next 4 RX descriptors and the next 4 pointers
2171                  * to mbufs.
2172                  */
2173                 if ((next_id & 0x3) == 0) {
2174                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2175                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2176                 }
2177
2178                 rxm = rxe->mbuf;
2179
2180                 if (!bulk_alloc) {
2181                         __le64 dma =
2182                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2183                         /*
2184                          * Update RX descriptor with the physical address of the
2185                          * new data buffer of the new allocated mbuf.
2186                          */
2187                         rxe->mbuf = nmb;
2188
2189                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2190                         rxdp->read.hdr_addr = 0;
2191                         rxdp->read.pkt_addr = dma;
2192                 } else
2193                         rxe->mbuf = NULL;
2194
2195                 /*
2196                  * Set data length & data buffer address of mbuf.
2197                  */
2198                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2199                 rxm->data_len = data_len;
2200
2201                 if (!eop) {
2202                         uint16_t nextp_id;
2203                         /*
2204                          * Get next descriptor index:
2205                          *  - For RSC it's in the NEXTP field.
2206                          *  - For a scattered packet - it's just a following
2207                          *    descriptor.
2208                          */
2209                         if (ixgbe_rsc_count(&rxd))
2210                                 nextp_id =
2211                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2212                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2213                         else
2214                                 nextp_id = next_id;
2215
2216                         next_sc_entry = &sw_sc_ring[nextp_id];
2217                         next_rxe = &sw_ring[nextp_id];
2218                         rte_ixgbe_prefetch(next_rxe);
2219                 }
2220
2221                 sc_entry = &sw_sc_ring[rx_id];
2222                 first_seg = sc_entry->fbuf;
2223                 sc_entry->fbuf = NULL;
2224
2225                 /*
2226                  * If this is the first buffer of the received packet,
2227                  * set the pointer to the first mbuf of the packet and
2228                  * initialize its context.
2229                  * Otherwise, update the total length and the number of segments
2230                  * of the current scattered packet, and update the pointer to
2231                  * the last mbuf of the current packet.
2232                  */
2233                 if (first_seg == NULL) {
2234                         first_seg = rxm;
2235                         first_seg->pkt_len = data_len;
2236                         first_seg->nb_segs = 1;
2237                 } else {
2238                         first_seg->pkt_len += data_len;
2239                         first_seg->nb_segs++;
2240                 }
2241
2242                 prev_id = rx_id;
2243                 rx_id = next_id;
2244
2245                 /*
2246                  * If this is not the last buffer of the received packet, update
2247                  * the pointer to the first mbuf at the NEXTP entry in the
2248                  * sw_sc_ring and continue to parse the RX ring.
2249                  */
2250                 if (!eop && next_rxe) {
2251                         rxm->next = next_rxe->mbuf;
2252                         next_sc_entry->fbuf = first_seg;
2253                         goto next_desc;
2254                 }
2255
2256                 /* Initialize the first mbuf of the returned packet */
2257                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2258
2259                 /*
2260                  * Deal with the case, when HW CRC srip is disabled.
2261                  * That can't happen when LRO is enabled, but still could
2262                  * happen for scattered RX mode.
2263                  */
2264                 first_seg->pkt_len -= rxq->crc_len;
2265                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2266                         struct rte_mbuf *lp;
2267
2268                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2269                                 ;
2270
2271                         first_seg->nb_segs--;
2272                         lp->data_len -= rxq->crc_len - rxm->data_len;
2273                         lp->next = NULL;
2274                         rte_pktmbuf_free_seg(rxm);
2275                 } else
2276                         rxm->data_len -= rxq->crc_len;
2277
2278                 /* Prefetch data of first segment, if configured to do so. */
2279                 rte_packet_prefetch((char *)first_seg->buf_addr +
2280                         first_seg->data_off);
2281
2282                 /*
2283                  * Store the mbuf address into the next entry of the array
2284                  * of returned packets.
2285                  */
2286                 rx_pkts[nb_rx++] = first_seg;
2287         }
2288
2289         /*
2290          * Record index of the next RX descriptor to probe.
2291          */
2292         rxq->rx_tail = rx_id;
2293
2294         /*
2295          * If the number of free RX descriptors is greater than the RX free
2296          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2297          * register.
2298          * Update the RDT with the value of the last processed RX descriptor
2299          * minus 1, to guarantee that the RDT register is never equal to the
2300          * RDH register, which creates a "full" ring situtation from the
2301          * hardware point of view...
2302          */
2303         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2304                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2305                            "nb_hold=%u nb_rx=%u",
2306                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2307
2308                 rte_wmb();
2309                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2310                 nb_hold = 0;
2311         }
2312
2313         rxq->nb_rx_hold = nb_hold;
2314         return nb_rx;
2315 }
2316
2317 uint16_t
2318 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2319                                  uint16_t nb_pkts)
2320 {
2321         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2322 }
2323
2324 uint16_t
2325 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2326                                uint16_t nb_pkts)
2327 {
2328         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2329 }
2330
2331 /*********************************************************************
2332  *
2333  *  Queue management functions
2334  *
2335  **********************************************************************/
2336
2337 static void __rte_cold
2338 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2339 {
2340         unsigned i;
2341
2342         if (txq->sw_ring != NULL) {
2343                 for (i = 0; i < txq->nb_tx_desc; i++) {
2344                         if (txq->sw_ring[i].mbuf != NULL) {
2345                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2346                                 txq->sw_ring[i].mbuf = NULL;
2347                         }
2348                 }
2349         }
2350 }
2351
2352 static int
2353 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2354 {
2355         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2356         uint16_t i, tx_last, tx_id;
2357         uint16_t nb_tx_free_last;
2358         uint16_t nb_tx_to_clean;
2359         uint32_t pkt_cnt;
2360
2361         /* Start free mbuf from the next of tx_tail */
2362         tx_last = txq->tx_tail;
2363         tx_id  = swr_ring[tx_last].next_id;
2364
2365         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2366                 return 0;
2367
2368         nb_tx_to_clean = txq->nb_tx_free;
2369         nb_tx_free_last = txq->nb_tx_free;
2370         if (!free_cnt)
2371                 free_cnt = txq->nb_tx_desc;
2372
2373         /* Loop through swr_ring to count the amount of
2374          * freeable mubfs and packets.
2375          */
2376         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2377                 for (i = 0; i < nb_tx_to_clean &&
2378                         pkt_cnt < free_cnt &&
2379                         tx_id != tx_last; i++) {
2380                         if (swr_ring[tx_id].mbuf != NULL) {
2381                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2382                                 swr_ring[tx_id].mbuf = NULL;
2383
2384                                 /*
2385                                  * last segment in the packet,
2386                                  * increment packet count
2387                                  */
2388                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2389                         }
2390
2391                         tx_id = swr_ring[tx_id].next_id;
2392                 }
2393
2394                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2395                         txq->nb_tx_free || tx_id == tx_last)
2396                         break;
2397
2398                 if (pkt_cnt < free_cnt) {
2399                         if (ixgbe_xmit_cleanup(txq))
2400                                 break;
2401
2402                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2403                         nb_tx_free_last = txq->nb_tx_free;
2404                 }
2405         }
2406
2407         return (int)pkt_cnt;
2408 }
2409
2410 static int
2411 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2412                         uint32_t free_cnt)
2413 {
2414         int i, n, cnt;
2415
2416         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2417                 free_cnt = txq->nb_tx_desc;
2418
2419         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2420
2421         for (i = 0; i < cnt; i += n) {
2422                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2423                         break;
2424
2425                 n = ixgbe_tx_free_bufs(txq);
2426
2427                 if (n == 0)
2428                         break;
2429         }
2430
2431         return i;
2432 }
2433
2434 static int
2435 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2436                         uint32_t free_cnt __rte_unused)
2437 {
2438         return -ENOTSUP;
2439 }
2440
2441 int
2442 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2443 {
2444         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2445         if (txq->offloads == 0 &&
2446 #ifdef RTE_LIB_SECURITY
2447                         !(txq->using_ipsec) &&
2448 #endif
2449                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2450                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2451                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2452                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2453                                         txq->sw_ring_v != NULL)) {
2454                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2455                 } else {
2456                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2457                 }
2458         }
2459
2460         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2461 }
2462
2463 static void __rte_cold
2464 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2465 {
2466         if (txq != NULL &&
2467             txq->sw_ring != NULL)
2468                 rte_free(txq->sw_ring);
2469 }
2470
2471 static void __rte_cold
2472 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2473 {
2474         if (txq != NULL && txq->ops != NULL) {
2475                 txq->ops->release_mbufs(txq);
2476                 txq->ops->free_swring(txq);
2477                 rte_free(txq);
2478         }
2479 }
2480
2481 void __rte_cold
2482 ixgbe_dev_tx_queue_release(void *txq)
2483 {
2484         ixgbe_tx_queue_release(txq);
2485 }
2486
2487 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2488 static void __rte_cold
2489 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2490 {
2491         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2492         struct ixgbe_tx_entry *txe = txq->sw_ring;
2493         uint16_t prev, i;
2494
2495         /* Zero out HW ring memory */
2496         for (i = 0; i < txq->nb_tx_desc; i++) {
2497                 txq->tx_ring[i] = zeroed_desc;
2498         }
2499
2500         /* Initialize SW ring entries */
2501         prev = (uint16_t) (txq->nb_tx_desc - 1);
2502         for (i = 0; i < txq->nb_tx_desc; i++) {
2503                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2504
2505                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2506                 txe[i].mbuf = NULL;
2507                 txe[i].last_id = i;
2508                 txe[prev].next_id = i;
2509                 prev = i;
2510         }
2511
2512         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2513         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2514
2515         txq->tx_tail = 0;
2516         txq->nb_tx_used = 0;
2517         /*
2518          * Always allow 1 descriptor to be un-allocated to avoid
2519          * a H/W race condition
2520          */
2521         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2522         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2523         txq->ctx_curr = 0;
2524         memset((void *)&txq->ctx_cache, 0,
2525                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2526 }
2527
2528 static const struct ixgbe_txq_ops def_txq_ops = {
2529         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2530         .free_swring = ixgbe_tx_free_swring,
2531         .reset = ixgbe_reset_tx_queue,
2532 };
2533
2534 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2535  * the queue parameters. Used in tx_queue_setup by primary process and then
2536  * in dev_init by secondary process when attaching to an existing ethdev.
2537  */
2538 void __rte_cold
2539 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2540 {
2541         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2542         if ((txq->offloads == 0) &&
2543 #ifdef RTE_LIB_SECURITY
2544                         !(txq->using_ipsec) &&
2545 #endif
2546                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2547                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2548                 dev->tx_pkt_prepare = NULL;
2549                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2550                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2551                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2552                                         ixgbe_txq_vec_setup(txq) == 0)) {
2553                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2554                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2555                 } else
2556                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2557         } else {
2558                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2559                 PMD_INIT_LOG(DEBUG,
2560                                 " - offloads = 0x%" PRIx64,
2561                                 txq->offloads);
2562                 PMD_INIT_LOG(DEBUG,
2563                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2564                                 (unsigned long)txq->tx_rs_thresh,
2565                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2566                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2567                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2568         }
2569 }
2570
2571 uint64_t
2572 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2573 {
2574         RTE_SET_USED(dev);
2575
2576         return 0;
2577 }
2578
2579 uint64_t
2580 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2581 {
2582         uint64_t tx_offload_capa;
2583         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2584
2585         tx_offload_capa =
2586                 DEV_TX_OFFLOAD_VLAN_INSERT |
2587                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2588                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2589                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2590                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2591                 DEV_TX_OFFLOAD_TCP_TSO     |
2592                 DEV_TX_OFFLOAD_MULTI_SEGS;
2593
2594         if (hw->mac.type == ixgbe_mac_82599EB ||
2595             hw->mac.type == ixgbe_mac_X540)
2596                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2597
2598         if (hw->mac.type == ixgbe_mac_X550 ||
2599             hw->mac.type == ixgbe_mac_X550EM_x ||
2600             hw->mac.type == ixgbe_mac_X550EM_a)
2601                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2602
2603 #ifdef RTE_LIB_SECURITY
2604         if (dev->security_ctx)
2605                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2606 #endif
2607         return tx_offload_capa;
2608 }
2609
2610 int __rte_cold
2611 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2612                          uint16_t queue_idx,
2613                          uint16_t nb_desc,
2614                          unsigned int socket_id,
2615                          const struct rte_eth_txconf *tx_conf)
2616 {
2617         const struct rte_memzone *tz;
2618         struct ixgbe_tx_queue *txq;
2619         struct ixgbe_hw     *hw;
2620         uint16_t tx_rs_thresh, tx_free_thresh;
2621         uint64_t offloads;
2622
2623         PMD_INIT_FUNC_TRACE();
2624         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2625
2626         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2627
2628         /*
2629          * Validate number of transmit descriptors.
2630          * It must not exceed hardware maximum, and must be multiple
2631          * of IXGBE_ALIGN.
2632          */
2633         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2634                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2635                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2636                 return -EINVAL;
2637         }
2638
2639         /*
2640          * The following two parameters control the setting of the RS bit on
2641          * transmit descriptors.
2642          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2643          * descriptors have been used.
2644          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2645          * descriptors are used or if the number of descriptors required
2646          * to transmit a packet is greater than the number of free TX
2647          * descriptors.
2648          * The following constraints must be satisfied:
2649          *  tx_rs_thresh must be greater than 0.
2650          *  tx_rs_thresh must be less than the size of the ring minus 2.
2651          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2652          *  tx_rs_thresh must be a divisor of the ring size.
2653          *  tx_free_thresh must be greater than 0.
2654          *  tx_free_thresh must be less than the size of the ring minus 3.
2655          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2656          * One descriptor in the TX ring is used as a sentinel to avoid a
2657          * H/W race condition, hence the maximum threshold constraints.
2658          * When set to zero use default values.
2659          */
2660         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2661                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2662         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2663         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2664                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2665         if (tx_conf->tx_rs_thresh > 0)
2666                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2667         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2668                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2669                              "exceed nb_desc. (tx_rs_thresh=%u "
2670                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2671                              (unsigned int)tx_rs_thresh,
2672                              (unsigned int)tx_free_thresh,
2673                              (unsigned int)nb_desc,
2674                              (int)dev->data->port_id,
2675                              (int)queue_idx);
2676                 return -(EINVAL);
2677         }
2678         if (tx_rs_thresh >= (nb_desc - 2)) {
2679                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2680                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2681                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2682                         (int)dev->data->port_id, (int)queue_idx);
2683                 return -(EINVAL);
2684         }
2685         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2686                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2687                         "(tx_rs_thresh=%u port=%d queue=%d)",
2688                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2689                         (int)dev->data->port_id, (int)queue_idx);
2690                 return -(EINVAL);
2691         }
2692         if (tx_free_thresh >= (nb_desc - 3)) {
2693                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2694                              "tx_free_thresh must be less than the number of "
2695                              "TX descriptors minus 3. (tx_free_thresh=%u "
2696                              "port=%d queue=%d)",
2697                              (unsigned int)tx_free_thresh,
2698                              (int)dev->data->port_id, (int)queue_idx);
2699                 return -(EINVAL);
2700         }
2701         if (tx_rs_thresh > tx_free_thresh) {
2702                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2703                              "tx_free_thresh. (tx_free_thresh=%u "
2704                              "tx_rs_thresh=%u port=%d queue=%d)",
2705                              (unsigned int)tx_free_thresh,
2706                              (unsigned int)tx_rs_thresh,
2707                              (int)dev->data->port_id,
2708                              (int)queue_idx);
2709                 return -(EINVAL);
2710         }
2711         if ((nb_desc % tx_rs_thresh) != 0) {
2712                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2713                              "number of TX descriptors. (tx_rs_thresh=%u "
2714                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2715                              (int)dev->data->port_id, (int)queue_idx);
2716                 return -(EINVAL);
2717         }
2718
2719         /*
2720          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2721          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2722          * by the NIC and all descriptors are written back after the NIC
2723          * accumulates WTHRESH descriptors.
2724          */
2725         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2726                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2727                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2728                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2729                              (int)dev->data->port_id, (int)queue_idx);
2730                 return -(EINVAL);
2731         }
2732
2733         /* Free memory prior to re-allocation if needed... */
2734         if (dev->data->tx_queues[queue_idx] != NULL) {
2735                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2736                 dev->data->tx_queues[queue_idx] = NULL;
2737         }
2738
2739         /* First allocate the tx queue data structure */
2740         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2741                                  RTE_CACHE_LINE_SIZE, socket_id);
2742         if (txq == NULL)
2743                 return -ENOMEM;
2744
2745         /*
2746          * Allocate TX ring hardware descriptors. A memzone large enough to
2747          * handle the maximum ring size is allocated in order to allow for
2748          * resizing in later calls to the queue setup function.
2749          */
2750         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2751                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2752                         IXGBE_ALIGN, socket_id);
2753         if (tz == NULL) {
2754                 ixgbe_tx_queue_release(txq);
2755                 return -ENOMEM;
2756         }
2757
2758         txq->nb_tx_desc = nb_desc;
2759         txq->tx_rs_thresh = tx_rs_thresh;
2760         txq->tx_free_thresh = tx_free_thresh;
2761         txq->pthresh = tx_conf->tx_thresh.pthresh;
2762         txq->hthresh = tx_conf->tx_thresh.hthresh;
2763         txq->wthresh = tx_conf->tx_thresh.wthresh;
2764         txq->queue_id = queue_idx;
2765         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2766                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2767         txq->port_id = dev->data->port_id;
2768         txq->offloads = offloads;
2769         txq->ops = &def_txq_ops;
2770         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2771 #ifdef RTE_LIB_SECURITY
2772         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2773                         DEV_TX_OFFLOAD_SECURITY);
2774 #endif
2775
2776         /*
2777          * Modification to set VFTDT for virtual function if vf is detected
2778          */
2779         if (hw->mac.type == ixgbe_mac_82599_vf ||
2780             hw->mac.type == ixgbe_mac_X540_vf ||
2781             hw->mac.type == ixgbe_mac_X550_vf ||
2782             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2783             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2784                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2785         else
2786                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2787
2788         txq->tx_ring_phys_addr = tz->iova;
2789         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2790
2791         /* Allocate software ring */
2792         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2793                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2794                                 RTE_CACHE_LINE_SIZE, socket_id);
2795         if (txq->sw_ring == NULL) {
2796                 ixgbe_tx_queue_release(txq);
2797                 return -ENOMEM;
2798         }
2799         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2800                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2801
2802         /* set up vector or scalar TX function as appropriate */
2803         ixgbe_set_tx_function(dev, txq);
2804
2805         txq->ops->reset(txq);
2806
2807         dev->data->tx_queues[queue_idx] = txq;
2808
2809
2810         return 0;
2811 }
2812
2813 /**
2814  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2815  *
2816  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2817  * in the sw_rsc_ring is not set to NULL but rather points to the next
2818  * mbuf of this RSC aggregation (that has not been completed yet and still
2819  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2820  * will just free first "nb_segs" segments of the cluster explicitly by calling
2821  * an rte_pktmbuf_free_seg().
2822  *
2823  * @m scattered cluster head
2824  */
2825 static void __rte_cold
2826 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2827 {
2828         uint16_t i, nb_segs = m->nb_segs;
2829         struct rte_mbuf *next_seg;
2830
2831         for (i = 0; i < nb_segs; i++) {
2832                 next_seg = m->next;
2833                 rte_pktmbuf_free_seg(m);
2834                 m = next_seg;
2835         }
2836 }
2837
2838 static void __rte_cold
2839 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2840 {
2841         unsigned i;
2842
2843         /* SSE Vector driver has a different way of releasing mbufs. */
2844         if (rxq->rx_using_sse) {
2845                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2846                 return;
2847         }
2848
2849         if (rxq->sw_ring != NULL) {
2850                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2851                         if (rxq->sw_ring[i].mbuf != NULL) {
2852                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2853                                 rxq->sw_ring[i].mbuf = NULL;
2854                         }
2855                 }
2856                 if (rxq->rx_nb_avail) {
2857                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2858                                 struct rte_mbuf *mb;
2859
2860                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2861                                 rte_pktmbuf_free_seg(mb);
2862                         }
2863                         rxq->rx_nb_avail = 0;
2864                 }
2865         }
2866
2867         if (rxq->sw_sc_ring)
2868                 for (i = 0; i < rxq->nb_rx_desc; i++)
2869                         if (rxq->sw_sc_ring[i].fbuf) {
2870                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2871                                 rxq->sw_sc_ring[i].fbuf = NULL;
2872                         }
2873 }
2874
2875 static void __rte_cold
2876 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2877 {
2878         if (rxq != NULL) {
2879                 ixgbe_rx_queue_release_mbufs(rxq);
2880                 rte_free(rxq->sw_ring);
2881                 rte_free(rxq->sw_sc_ring);
2882                 rte_free(rxq);
2883         }
2884 }
2885
2886 void __rte_cold
2887 ixgbe_dev_rx_queue_release(void *rxq)
2888 {
2889         ixgbe_rx_queue_release(rxq);
2890 }
2891
2892 /*
2893  * Check if Rx Burst Bulk Alloc function can be used.
2894  * Return
2895  *        0: the preconditions are satisfied and the bulk allocation function
2896  *           can be used.
2897  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2898  *           function must be used.
2899  */
2900 static inline int __rte_cold
2901 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2902 {
2903         int ret = 0;
2904
2905         /*
2906          * Make sure the following pre-conditions are satisfied:
2907          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2908          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2909          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2910          * Scattered packets are not supported.  This should be checked
2911          * outside of this function.
2912          */
2913         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2914                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2915                              "rxq->rx_free_thresh=%d, "
2916                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2917                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2918                 ret = -EINVAL;
2919         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2920                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2921                              "rxq->rx_free_thresh=%d, "
2922                              "rxq->nb_rx_desc=%d",
2923                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2924                 ret = -EINVAL;
2925         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2926                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2927                              "rxq->nb_rx_desc=%d, "
2928                              "rxq->rx_free_thresh=%d",
2929                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2930                 ret = -EINVAL;
2931         }
2932
2933         return ret;
2934 }
2935
2936 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2937 static void __rte_cold
2938 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2939 {
2940         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2941         unsigned i;
2942         uint16_t len = rxq->nb_rx_desc;
2943
2944         /*
2945          * By default, the Rx queue setup function allocates enough memory for
2946          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2947          * extra memory at the end of the descriptor ring to be zero'd out.
2948          */
2949         if (adapter->rx_bulk_alloc_allowed)
2950                 /* zero out extra memory */
2951                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2952
2953         /*
2954          * Zero out HW ring memory. Zero out extra memory at the end of
2955          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2956          * reads extra memory as zeros.
2957          */
2958         for (i = 0; i < len; i++) {
2959                 rxq->rx_ring[i] = zeroed_desc;
2960         }
2961
2962         /*
2963          * initialize extra software ring entries. Space for these extra
2964          * entries is always allocated
2965          */
2966         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2967         for (i = rxq->nb_rx_desc; i < len; ++i) {
2968                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2969         }
2970
2971         rxq->rx_nb_avail = 0;
2972         rxq->rx_next_avail = 0;
2973         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2974         rxq->rx_tail = 0;
2975         rxq->nb_rx_hold = 0;
2976         rxq->pkt_first_seg = NULL;
2977         rxq->pkt_last_seg = NULL;
2978
2979 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2980         rxq->rxrearm_start = 0;
2981         rxq->rxrearm_nb = 0;
2982 #endif
2983 }
2984
2985 static int
2986 ixgbe_is_vf(struct rte_eth_dev *dev)
2987 {
2988         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2989
2990         switch (hw->mac.type) {
2991         case ixgbe_mac_82599_vf:
2992         case ixgbe_mac_X540_vf:
2993         case ixgbe_mac_X550_vf:
2994         case ixgbe_mac_X550EM_x_vf:
2995         case ixgbe_mac_X550EM_a_vf:
2996                 return 1;
2997         default:
2998                 return 0;
2999         }
3000 }
3001
3002 uint64_t
3003 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
3004 {
3005         uint64_t offloads = 0;
3006         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3007
3008         if (hw->mac.type != ixgbe_mac_82598EB)
3009                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3010
3011         return offloads;
3012 }
3013
3014 uint64_t
3015 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3016 {
3017         uint64_t offloads;
3018         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3019
3020         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
3021                    DEV_RX_OFFLOAD_UDP_CKSUM   |
3022                    DEV_RX_OFFLOAD_TCP_CKSUM   |
3023                    DEV_RX_OFFLOAD_KEEP_CRC    |
3024                    DEV_RX_OFFLOAD_JUMBO_FRAME |
3025                    DEV_RX_OFFLOAD_VLAN_FILTER |
3026                    DEV_RX_OFFLOAD_SCATTER |
3027                    DEV_RX_OFFLOAD_RSS_HASH;
3028
3029         if (hw->mac.type == ixgbe_mac_82598EB)
3030                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3031
3032         if (ixgbe_is_vf(dev) == 0)
3033                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
3034
3035         /*
3036          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3037          * mode.
3038          */
3039         if ((hw->mac.type == ixgbe_mac_82599EB ||
3040              hw->mac.type == ixgbe_mac_X540 ||
3041              hw->mac.type == ixgbe_mac_X550) &&
3042             !RTE_ETH_DEV_SRIOV(dev).active)
3043                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
3044
3045         if (hw->mac.type == ixgbe_mac_82599EB ||
3046             hw->mac.type == ixgbe_mac_X540)
3047                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3048
3049         if (hw->mac.type == ixgbe_mac_X550 ||
3050             hw->mac.type == ixgbe_mac_X550EM_x ||
3051             hw->mac.type == ixgbe_mac_X550EM_a)
3052                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3053
3054 #ifdef RTE_LIB_SECURITY
3055         if (dev->security_ctx)
3056                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3057 #endif
3058
3059         return offloads;
3060 }
3061
3062 int __rte_cold
3063 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3064                          uint16_t queue_idx,
3065                          uint16_t nb_desc,
3066                          unsigned int socket_id,
3067                          const struct rte_eth_rxconf *rx_conf,
3068                          struct rte_mempool *mp)
3069 {
3070         const struct rte_memzone *rz;
3071         struct ixgbe_rx_queue *rxq;
3072         struct ixgbe_hw     *hw;
3073         uint16_t len;
3074         struct ixgbe_adapter *adapter = dev->data->dev_private;
3075         uint64_t offloads;
3076
3077         PMD_INIT_FUNC_TRACE();
3078         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3079
3080         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3081
3082         /*
3083          * Validate number of receive descriptors.
3084          * It must not exceed hardware maximum, and must be multiple
3085          * of IXGBE_ALIGN.
3086          */
3087         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3088                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3089                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3090                 return -EINVAL;
3091         }
3092
3093         /* Free memory prior to re-allocation if needed... */
3094         if (dev->data->rx_queues[queue_idx] != NULL) {
3095                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3096                 dev->data->rx_queues[queue_idx] = NULL;
3097         }
3098
3099         /* First allocate the rx queue data structure */
3100         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3101                                  RTE_CACHE_LINE_SIZE, socket_id);
3102         if (rxq == NULL)
3103                 return -ENOMEM;
3104         rxq->mb_pool = mp;
3105         rxq->nb_rx_desc = nb_desc;
3106         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3107         rxq->queue_id = queue_idx;
3108         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3109                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3110         rxq->port_id = dev->data->port_id;
3111         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3112                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3113         else
3114                 rxq->crc_len = 0;
3115         rxq->drop_en = rx_conf->rx_drop_en;
3116         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3117         rxq->offloads = offloads;
3118
3119         /*
3120          * The packet type in RX descriptor is different for different NICs.
3121          * Some bits are used for x550 but reserved for other NICS.
3122          * So set different masks for different NICs.
3123          */
3124         if (hw->mac.type == ixgbe_mac_X550 ||
3125             hw->mac.type == ixgbe_mac_X550EM_x ||
3126             hw->mac.type == ixgbe_mac_X550EM_a ||
3127             hw->mac.type == ixgbe_mac_X550_vf ||
3128             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3129             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3130                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3131         else
3132                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3133
3134         /*
3135          * 82599 errata, UDP frames with a 0 checksum can be marked as checksum
3136          * errors.
3137          */
3138         if (hw->mac.type == ixgbe_mac_82599EB)
3139                 rxq->rx_udp_csum_zero_err = 1;
3140
3141         /*
3142          * Allocate RX ring hardware descriptors. A memzone large enough to
3143          * handle the maximum ring size is allocated in order to allow for
3144          * resizing in later calls to the queue setup function.
3145          */
3146         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3147                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3148         if (rz == NULL) {
3149                 ixgbe_rx_queue_release(rxq);
3150                 return -ENOMEM;
3151         }
3152
3153         /*
3154          * Zero init all the descriptors in the ring.
3155          */
3156         memset(rz->addr, 0, RX_RING_SZ);
3157
3158         /*
3159          * Modified to setup VFRDT for Virtual Function
3160          */
3161         if (hw->mac.type == ixgbe_mac_82599_vf ||
3162             hw->mac.type == ixgbe_mac_X540_vf ||
3163             hw->mac.type == ixgbe_mac_X550_vf ||
3164             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3165             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3166                 rxq->rdt_reg_addr =
3167                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3168                 rxq->rdh_reg_addr =
3169                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3170         } else {
3171                 rxq->rdt_reg_addr =
3172                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3173                 rxq->rdh_reg_addr =
3174                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3175         }
3176
3177         rxq->rx_ring_phys_addr = rz->iova;
3178         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3179
3180         /*
3181          * Certain constraints must be met in order to use the bulk buffer
3182          * allocation Rx burst function. If any of Rx queues doesn't meet them
3183          * the feature should be disabled for the whole port.
3184          */
3185         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3186                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3187                                     "preconditions - canceling the feature for "
3188                                     "the whole port[%d]",
3189                              rxq->queue_id, rxq->port_id);
3190                 adapter->rx_bulk_alloc_allowed = false;
3191         }
3192
3193         /*
3194          * Allocate software ring. Allow for space at the end of the
3195          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3196          * function does not access an invalid memory region.
3197          */
3198         len = nb_desc;
3199         if (adapter->rx_bulk_alloc_allowed)
3200                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3201
3202         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3203                                           sizeof(struct ixgbe_rx_entry) * len,
3204                                           RTE_CACHE_LINE_SIZE, socket_id);
3205         if (!rxq->sw_ring) {
3206                 ixgbe_rx_queue_release(rxq);
3207                 return -ENOMEM;
3208         }
3209
3210         /*
3211          * Always allocate even if it's not going to be needed in order to
3212          * simplify the code.
3213          *
3214          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3215          * be requested in ixgbe_dev_rx_init(), which is called later from
3216          * dev_start() flow.
3217          */
3218         rxq->sw_sc_ring =
3219                 rte_zmalloc_socket("rxq->sw_sc_ring",
3220                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3221                                    RTE_CACHE_LINE_SIZE, socket_id);
3222         if (!rxq->sw_sc_ring) {
3223                 ixgbe_rx_queue_release(rxq);
3224                 return -ENOMEM;
3225         }
3226
3227         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3228                             "dma_addr=0x%"PRIx64,
3229                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3230                      rxq->rx_ring_phys_addr);
3231
3232         if (!rte_is_power_of_2(nb_desc)) {
3233                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3234                                     "preconditions - canceling the feature for "
3235                                     "the whole port[%d]",
3236                              rxq->queue_id, rxq->port_id);
3237                 adapter->rx_vec_allowed = false;
3238         } else
3239                 ixgbe_rxq_vec_setup(rxq);
3240
3241         dev->data->rx_queues[queue_idx] = rxq;
3242
3243         ixgbe_reset_rx_queue(adapter, rxq);
3244
3245         return 0;
3246 }
3247
3248 uint32_t
3249 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3250 {
3251 #define IXGBE_RXQ_SCAN_INTERVAL 4
3252         volatile union ixgbe_adv_rx_desc *rxdp;
3253         struct ixgbe_rx_queue *rxq;
3254         uint32_t desc = 0;
3255
3256         rxq = dev->data->rx_queues[rx_queue_id];
3257         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3258
3259         while ((desc < rxq->nb_rx_desc) &&
3260                 (rxdp->wb.upper.status_error &
3261                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3262                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3263                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3264                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3265                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3266                                 desc - rxq->nb_rx_desc]);
3267         }
3268
3269         return desc;
3270 }
3271
3272 int
3273 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3274 {
3275         volatile union ixgbe_adv_rx_desc *rxdp;
3276         struct ixgbe_rx_queue *rxq = rx_queue;
3277         uint32_t desc;
3278
3279         if (unlikely(offset >= rxq->nb_rx_desc))
3280                 return 0;
3281         desc = rxq->rx_tail + offset;
3282         if (desc >= rxq->nb_rx_desc)
3283                 desc -= rxq->nb_rx_desc;
3284
3285         rxdp = &rxq->rx_ring[desc];
3286         return !!(rxdp->wb.upper.status_error &
3287                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3288 }
3289
3290 int
3291 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3292 {
3293         struct ixgbe_rx_queue *rxq = rx_queue;
3294         volatile uint32_t *status;
3295         uint32_t nb_hold, desc;
3296
3297         if (unlikely(offset >= rxq->nb_rx_desc))
3298                 return -EINVAL;
3299
3300 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3301         if (rxq->rx_using_sse)
3302                 nb_hold = rxq->rxrearm_nb;
3303         else
3304 #endif
3305                 nb_hold = rxq->nb_rx_hold;
3306         if (offset >= rxq->nb_rx_desc - nb_hold)
3307                 return RTE_ETH_RX_DESC_UNAVAIL;
3308
3309         desc = rxq->rx_tail + offset;
3310         if (desc >= rxq->nb_rx_desc)
3311                 desc -= rxq->nb_rx_desc;
3312
3313         status = &rxq->rx_ring[desc].wb.upper.status_error;
3314         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3315                 return RTE_ETH_RX_DESC_DONE;
3316
3317         return RTE_ETH_RX_DESC_AVAIL;
3318 }
3319
3320 int
3321 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3322 {
3323         struct ixgbe_tx_queue *txq = tx_queue;
3324         volatile uint32_t *status;
3325         uint32_t desc;
3326
3327         if (unlikely(offset >= txq->nb_tx_desc))
3328                 return -EINVAL;
3329
3330         desc = txq->tx_tail + offset;
3331         /* go to next desc that has the RS bit */
3332         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3333                 txq->tx_rs_thresh;
3334         if (desc >= txq->nb_tx_desc) {
3335                 desc -= txq->nb_tx_desc;
3336                 if (desc >= txq->nb_tx_desc)
3337                         desc -= txq->nb_tx_desc;
3338         }
3339
3340         status = &txq->tx_ring[desc].wb.status;
3341         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3342                 return RTE_ETH_TX_DESC_DONE;
3343
3344         return RTE_ETH_TX_DESC_FULL;
3345 }
3346
3347 /*
3348  * Set up link loopback for X540/X550 mode Tx->Rx.
3349  */
3350 static inline void __rte_cold
3351 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3352 {
3353         uint32_t macc;
3354         PMD_INIT_FUNC_TRACE();
3355
3356         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3357
3358         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3359                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3360         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3361
3362         if (enable) {
3363                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3364                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3365                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3366                 macc |= IXGBE_MACC_FLU;
3367         } else {
3368                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3369                 macc &= ~IXGBE_MACC_FLU;
3370         }
3371
3372         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3373                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3374
3375         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3376 }
3377
3378 void __rte_cold
3379 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3380 {
3381         unsigned i;
3382         struct ixgbe_adapter *adapter = dev->data->dev_private;
3383         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3384
3385         PMD_INIT_FUNC_TRACE();
3386
3387         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3388                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3389
3390                 if (txq != NULL) {
3391                         txq->ops->release_mbufs(txq);
3392                         txq->ops->reset(txq);
3393                 }
3394         }
3395
3396         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3397                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3398
3399                 if (rxq != NULL) {
3400                         ixgbe_rx_queue_release_mbufs(rxq);
3401                         ixgbe_reset_rx_queue(adapter, rxq);
3402                 }
3403         }
3404         /* If loopback mode was enabled, reconfigure the link accordingly */
3405         if (dev->data->dev_conf.lpbk_mode != 0) {
3406                 if (hw->mac.type == ixgbe_mac_X540 ||
3407                      hw->mac.type == ixgbe_mac_X550 ||
3408                      hw->mac.type == ixgbe_mac_X550EM_x ||
3409                      hw->mac.type == ixgbe_mac_X550EM_a)
3410                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3411         }
3412 }
3413
3414 void
3415 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3416 {
3417         unsigned i;
3418
3419         PMD_INIT_FUNC_TRACE();
3420
3421         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3422                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3423                 dev->data->rx_queues[i] = NULL;
3424                 rte_eth_dma_zone_free(dev, "rx_ring", i);
3425         }
3426         dev->data->nb_rx_queues = 0;
3427
3428         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3429                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3430                 dev->data->tx_queues[i] = NULL;
3431                 rte_eth_dma_zone_free(dev, "tx_ring", i);
3432         }
3433         dev->data->nb_tx_queues = 0;
3434 }
3435
3436 /*********************************************************************
3437  *
3438  *  Device RX/TX init functions
3439  *
3440  **********************************************************************/
3441
3442 /**
3443  * Receive Side Scaling (RSS)
3444  * See section 7.1.2.8 in the following document:
3445  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3446  *
3447  * Principles:
3448  * The source and destination IP addresses of the IP header and the source
3449  * and destination ports of TCP/UDP headers, if any, of received packets are
3450  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3451  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3452  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3453  * RSS output index which is used as the RX queue index where to store the
3454  * received packets.
3455  * The following output is supplied in the RX write-back descriptor:
3456  *     - 32-bit result of the Microsoft RSS hash function,
3457  *     - 4-bit RSS type field.
3458  */
3459
3460 /*
3461  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3462  * Used as the default key.
3463  */
3464 static uint8_t rss_intel_key[40] = {
3465         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3466         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3467         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3468         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3469         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3470 };
3471
3472 static void
3473 ixgbe_rss_disable(struct rte_eth_dev *dev)
3474 {
3475         struct ixgbe_hw *hw;
3476         uint32_t mrqc;
3477         uint32_t mrqc_reg;
3478
3479         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3480         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3481         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3482         mrqc &= ~IXGBE_MRQC_RSSEN;
3483         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3484 }
3485
3486 static void
3487 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3488 {
3489         uint8_t  *hash_key;
3490         uint32_t mrqc;
3491         uint32_t rss_key;
3492         uint64_t rss_hf;
3493         uint16_t i;
3494         uint32_t mrqc_reg;
3495         uint32_t rssrk_reg;
3496
3497         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3498         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3499
3500         hash_key = rss_conf->rss_key;
3501         if (hash_key != NULL) {
3502                 /* Fill in RSS hash key */
3503                 for (i = 0; i < 10; i++) {
3504                         rss_key  = hash_key[(i * 4)];
3505                         rss_key |= hash_key[(i * 4) + 1] << 8;
3506                         rss_key |= hash_key[(i * 4) + 2] << 16;
3507                         rss_key |= hash_key[(i * 4) + 3] << 24;
3508                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3509                 }
3510         }
3511
3512         /* Set configured hashing protocols in MRQC register */
3513         rss_hf = rss_conf->rss_hf;
3514         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3515         if (rss_hf & ETH_RSS_IPV4)
3516                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3517         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3518                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3519         if (rss_hf & ETH_RSS_IPV6)
3520                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3521         if (rss_hf & ETH_RSS_IPV6_EX)
3522                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3523         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3524                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3525         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3526                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3527         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3528                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3529         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3530                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3531         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3532                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3533         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3534 }
3535
3536 int
3537 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3538                           struct rte_eth_rss_conf *rss_conf)
3539 {
3540         struct ixgbe_hw *hw;
3541         uint32_t mrqc;
3542         uint64_t rss_hf;
3543         uint32_t mrqc_reg;
3544
3545         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3546
3547         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3548                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3549                         "NIC.");
3550                 return -ENOTSUP;
3551         }
3552         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3553
3554         /*
3555          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3556          *     "RSS enabling cannot be done dynamically while it must be
3557          *      preceded by a software reset"
3558          * Before changing anything, first check that the update RSS operation
3559          * does not attempt to disable RSS, if RSS was enabled at
3560          * initialization time, or does not attempt to enable RSS, if RSS was
3561          * disabled at initialization time.
3562          */
3563         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3564         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3565         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3566                 if (rss_hf != 0) /* Enable RSS */
3567                         return -(EINVAL);
3568                 return 0; /* Nothing to do */
3569         }
3570         /* RSS enabled */
3571         if (rss_hf == 0) /* Disable RSS */
3572                 return -(EINVAL);
3573         ixgbe_hw_rss_hash_set(hw, rss_conf);
3574         return 0;
3575 }
3576
3577 int
3578 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3579                             struct rte_eth_rss_conf *rss_conf)
3580 {
3581         struct ixgbe_hw *hw;
3582         uint8_t *hash_key;
3583         uint32_t mrqc;
3584         uint32_t rss_key;
3585         uint64_t rss_hf;
3586         uint16_t i;
3587         uint32_t mrqc_reg;
3588         uint32_t rssrk_reg;
3589
3590         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3591         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3592         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3593         hash_key = rss_conf->rss_key;
3594         if (hash_key != NULL) {
3595                 /* Return RSS hash key */
3596                 for (i = 0; i < 10; i++) {
3597                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3598                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3599                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3600                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3601                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3602                 }
3603         }
3604
3605         /* Get RSS functions configured in MRQC register */
3606         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3607         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3608                 rss_conf->rss_hf = 0;
3609                 return 0;
3610         }
3611         rss_hf = 0;
3612         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3613                 rss_hf |= ETH_RSS_IPV4;
3614         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3615                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3616         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3617                 rss_hf |= ETH_RSS_IPV6;
3618         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3619                 rss_hf |= ETH_RSS_IPV6_EX;
3620         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3621                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3622         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3623                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3624         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3625                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3626         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3627                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3628         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3629                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3630         rss_conf->rss_hf = rss_hf;
3631         return 0;
3632 }
3633
3634 static void
3635 ixgbe_rss_configure(struct rte_eth_dev *dev)
3636 {
3637         struct rte_eth_rss_conf rss_conf;
3638         struct ixgbe_adapter *adapter;
3639         struct ixgbe_hw *hw;
3640         uint32_t reta;
3641         uint16_t i;
3642         uint16_t j;
3643         uint16_t sp_reta_size;
3644         uint32_t reta_reg;
3645
3646         PMD_INIT_FUNC_TRACE();
3647         adapter = dev->data->dev_private;
3648         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3649
3650         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3651
3652         /*
3653          * Fill in redirection table
3654          * The byte-swap is needed because NIC registers are in
3655          * little-endian order.
3656          */
3657         if (adapter->rss_reta_updated == 0) {
3658                 reta = 0;
3659                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3660                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3661
3662                         if (j == dev->data->nb_rx_queues)
3663                                 j = 0;
3664                         reta = (reta << 8) | j;
3665                         if ((i & 3) == 3)
3666                                 IXGBE_WRITE_REG(hw, reta_reg,
3667                                                 rte_bswap32(reta));
3668                 }
3669         }
3670
3671         /*
3672          * Configure the RSS key and the RSS protocols used to compute
3673          * the RSS hash of input packets.
3674          */
3675         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3676         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3677                 ixgbe_rss_disable(dev);
3678                 return;
3679         }
3680         if (rss_conf.rss_key == NULL)
3681                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3682         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3683 }
3684
3685 #define NUM_VFTA_REGISTERS 128
3686 #define NIC_RX_BUFFER_SIZE 0x200
3687 #define X550_RX_BUFFER_SIZE 0x180
3688
3689 static void
3690 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3691 {
3692         struct rte_eth_vmdq_dcb_conf *cfg;
3693         struct ixgbe_hw *hw;
3694         enum rte_eth_nb_pools num_pools;
3695         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3696         uint16_t pbsize;
3697         uint8_t nb_tcs; /* number of traffic classes */
3698         int i;
3699
3700         PMD_INIT_FUNC_TRACE();
3701         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3702         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3703         num_pools = cfg->nb_queue_pools;
3704         /* Check we have a valid number of pools */
3705         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3706                 ixgbe_rss_disable(dev);
3707                 return;
3708         }
3709         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3710         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3711
3712         /*
3713          * RXPBSIZE
3714          * split rx buffer up into sections, each for 1 traffic class
3715          */
3716         switch (hw->mac.type) {
3717         case ixgbe_mac_X550:
3718         case ixgbe_mac_X550EM_x:
3719         case ixgbe_mac_X550EM_a:
3720                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3721                 break;
3722         default:
3723                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3724                 break;
3725         }
3726         for (i = 0; i < nb_tcs; i++) {
3727                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3728
3729                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3730                 /* clear 10 bits. */
3731                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3732                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3733         }
3734         /* zero alloc all unused TCs */
3735         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3736                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3737
3738                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3739                 /* clear 10 bits. */
3740                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3741         }
3742
3743         /* MRQC: enable vmdq and dcb */
3744         mrqc = (num_pools == ETH_16_POOLS) ?
3745                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3746         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3747
3748         /* PFVTCTL: turn on virtualisation and set the default pool */
3749         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3750         if (cfg->enable_default_pool) {
3751                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3752         } else {
3753                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3754         }
3755
3756         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3757
3758         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3759         queue_mapping = 0;
3760         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3761                 /*
3762                  * mapping is done with 3 bits per priority,
3763                  * so shift by i*3 each time
3764                  */
3765                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3766
3767         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3768
3769         /* RTRPCS: DCB related */
3770         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3771
3772         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3773         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3774         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3775         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3776
3777         /* VFTA - enable all vlan filters */
3778         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3779                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3780         }
3781
3782         /* VFRE: pool enabling for receive - 16 or 32 */
3783         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3784                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3785
3786         /*
3787          * MPSAR - allow pools to read specific mac addresses
3788          * In this case, all pools should be able to read from mac addr 0
3789          */
3790         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3791         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3792
3793         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3794         for (i = 0; i < cfg->nb_pool_maps; i++) {
3795                 /* set vlan id in VF register and set the valid bit */
3796                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3797                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3798                 /*
3799                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3800                  * pools, we only need to use the first half of the register
3801                  * i.e. bits 0-31
3802                  */
3803                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3804         }
3805 }
3806
3807 /**
3808  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3809  * @dev: pointer to eth_dev structure
3810  * @dcb_config: pointer to ixgbe_dcb_config structure
3811  */
3812 static void
3813 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3814                        struct ixgbe_dcb_config *dcb_config)
3815 {
3816         uint32_t reg;
3817         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3818
3819         PMD_INIT_FUNC_TRACE();
3820         if (hw->mac.type != ixgbe_mac_82598EB) {
3821                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3822                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3823                 reg |= IXGBE_RTTDCS_ARBDIS;
3824                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3825
3826                 /* Enable DCB for Tx with 8 TCs */
3827                 if (dcb_config->num_tcs.pg_tcs == 8) {
3828                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3829                 } else {
3830                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3831                 }
3832                 if (dcb_config->vt_mode)
3833                         reg |= IXGBE_MTQC_VT_ENA;
3834                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3835
3836                 /* Enable the Tx desc arbiter */
3837                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3838                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3839                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3840
3841                 /* Enable Security TX Buffer IFG for DCB */
3842                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3843                 reg |= IXGBE_SECTX_DCB;
3844                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3845         }
3846 }
3847
3848 /**
3849  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3850  * @dev: pointer to rte_eth_dev structure
3851  * @dcb_config: pointer to ixgbe_dcb_config structure
3852  */
3853 static void
3854 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3855                         struct ixgbe_dcb_config *dcb_config)
3856 {
3857         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3858                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3859         struct ixgbe_hw *hw =
3860                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3861
3862         PMD_INIT_FUNC_TRACE();
3863         if (hw->mac.type != ixgbe_mac_82598EB)
3864                 /*PF VF Transmit Enable*/
3865                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3866                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3867
3868         /*Configure general DCB TX parameters*/
3869         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3870 }
3871
3872 static void
3873 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3874                         struct ixgbe_dcb_config *dcb_config)
3875 {
3876         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3877                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3878         struct ixgbe_dcb_tc_config *tc;
3879         uint8_t i, j;
3880
3881         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3882         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3883                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3884                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3885         } else {
3886                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3887                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3888         }
3889
3890         /* Initialize User Priority to Traffic Class mapping */
3891         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3892                 tc = &dcb_config->tc_config[j];
3893                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3894         }
3895
3896         /* User Priority to Traffic Class mapping */
3897         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3898                 j = vmdq_rx_conf->dcb_tc[i];
3899                 tc = &dcb_config->tc_config[j];
3900                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3901                                                 (uint8_t)(1 << i);
3902         }
3903 }
3904
3905 static void
3906 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3907                         struct ixgbe_dcb_config *dcb_config)
3908 {
3909         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3910                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3911         struct ixgbe_dcb_tc_config *tc;
3912         uint8_t i, j;
3913
3914         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3915         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3916                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3917                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3918         } else {
3919                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3920                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3921         }
3922
3923         /* Initialize User Priority to Traffic Class mapping */
3924         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3925                 tc = &dcb_config->tc_config[j];
3926                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3927         }
3928
3929         /* User Priority to Traffic Class mapping */
3930         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3931                 j = vmdq_tx_conf->dcb_tc[i];
3932                 tc = &dcb_config->tc_config[j];
3933                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3934                                                 (uint8_t)(1 << i);
3935         }
3936 }
3937
3938 static void
3939 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3940                 struct ixgbe_dcb_config *dcb_config)
3941 {
3942         struct rte_eth_dcb_rx_conf *rx_conf =
3943                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3944         struct ixgbe_dcb_tc_config *tc;
3945         uint8_t i, j;
3946
3947         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3948         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3949
3950         /* Initialize User Priority to Traffic Class mapping */
3951         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3952                 tc = &dcb_config->tc_config[j];
3953                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3954         }
3955
3956         /* User Priority to Traffic Class mapping */
3957         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3958                 j = rx_conf->dcb_tc[i];
3959                 tc = &dcb_config->tc_config[j];
3960                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3961                                                 (uint8_t)(1 << i);
3962         }
3963 }
3964
3965 static void
3966 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3967                 struct ixgbe_dcb_config *dcb_config)
3968 {
3969         struct rte_eth_dcb_tx_conf *tx_conf =
3970                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3971         struct ixgbe_dcb_tc_config *tc;
3972         uint8_t i, j;
3973
3974         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3975         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3976
3977         /* Initialize User Priority to Traffic Class mapping */
3978         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3979                 tc = &dcb_config->tc_config[j];
3980                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3981         }
3982
3983         /* User Priority to Traffic Class mapping */
3984         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3985                 j = tx_conf->dcb_tc[i];
3986                 tc = &dcb_config->tc_config[j];
3987                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3988                                                 (uint8_t)(1 << i);
3989         }
3990 }
3991
3992 /**
3993  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3994  * @dev: pointer to eth_dev structure
3995  * @dcb_config: pointer to ixgbe_dcb_config structure
3996  */
3997 static void
3998 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3999                        struct ixgbe_dcb_config *dcb_config)
4000 {
4001         uint32_t reg;
4002         uint32_t vlanctrl;
4003         uint8_t i;
4004         uint32_t q;
4005         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4006
4007         PMD_INIT_FUNC_TRACE();
4008         /*
4009          * Disable the arbiter before changing parameters
4010          * (always enable recycle mode; WSP)
4011          */
4012         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
4013         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4014
4015         if (hw->mac.type != ixgbe_mac_82598EB) {
4016                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
4017                 if (dcb_config->num_tcs.pg_tcs == 4) {
4018                         if (dcb_config->vt_mode)
4019                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4020                                         IXGBE_MRQC_VMDQRT4TCEN;
4021                         else {
4022                                 /* no matter the mode is DCB or DCB_RSS, just
4023                                  * set the MRQE to RSSXTCEN. RSS is controlled
4024                                  * by RSS_FIELD
4025                                  */
4026                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4027                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4028                                         IXGBE_MRQC_RTRSS4TCEN;
4029                         }
4030                 }
4031                 if (dcb_config->num_tcs.pg_tcs == 8) {
4032                         if (dcb_config->vt_mode)
4033                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4034                                         IXGBE_MRQC_VMDQRT8TCEN;
4035                         else {
4036                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4037                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4038                                         IXGBE_MRQC_RTRSS8TCEN;
4039                         }
4040                 }
4041
4042                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4043
4044                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4045                         /* Disable drop for all queues in VMDQ mode*/
4046                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4047                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4048                                                 (IXGBE_QDE_WRITE |
4049                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4050                 } else {
4051                         /* Enable drop for all queues in SRIOV mode */
4052                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4053                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4054                                                 (IXGBE_QDE_WRITE |
4055                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4056                                                  IXGBE_QDE_ENABLE));
4057                 }
4058         }
4059
4060         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4061         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4062         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4063         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4064
4065         /* VFTA - enable all vlan filters */
4066         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4067                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4068         }
4069
4070         /*
4071          * Configure Rx packet plane (recycle mode; WSP) and
4072          * enable arbiter
4073          */
4074         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4075         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4076 }
4077
4078 static void
4079 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4080                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4081 {
4082         switch (hw->mac.type) {
4083         case ixgbe_mac_82598EB:
4084                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4085                 break;
4086         case ixgbe_mac_82599EB:
4087         case ixgbe_mac_X540:
4088         case ixgbe_mac_X550:
4089         case ixgbe_mac_X550EM_x:
4090         case ixgbe_mac_X550EM_a:
4091                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4092                                                   tsa, map);
4093                 break;
4094         default:
4095                 break;
4096         }
4097 }
4098
4099 static void
4100 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4101                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4102 {
4103         switch (hw->mac.type) {
4104         case ixgbe_mac_82598EB:
4105                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4106                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4107                 break;
4108         case ixgbe_mac_82599EB:
4109         case ixgbe_mac_X540:
4110         case ixgbe_mac_X550:
4111         case ixgbe_mac_X550EM_x:
4112         case ixgbe_mac_X550EM_a:
4113                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4114                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4115                 break;
4116         default:
4117                 break;
4118         }
4119 }
4120
4121 #define DCB_RX_CONFIG  1
4122 #define DCB_TX_CONFIG  1
4123 #define DCB_TX_PB      1024
4124 /**
4125  * ixgbe_dcb_hw_configure - Enable DCB and configure
4126  * general DCB in VT mode and non-VT mode parameters
4127  * @dev: pointer to rte_eth_dev structure
4128  * @dcb_config: pointer to ixgbe_dcb_config structure
4129  */
4130 static int
4131 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4132                         struct ixgbe_dcb_config *dcb_config)
4133 {
4134         int     ret = 0;
4135         uint8_t i, pfc_en, nb_tcs;
4136         uint16_t pbsize, rx_buffer_size;
4137         uint8_t config_dcb_rx = 0;
4138         uint8_t config_dcb_tx = 0;
4139         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4140         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4141         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4142         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4143         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4144         struct ixgbe_dcb_tc_config *tc;
4145         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4146                 RTE_ETHER_CRC_LEN;
4147         struct ixgbe_hw *hw =
4148                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4149         struct ixgbe_bw_conf *bw_conf =
4150                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4151
4152         switch (dev->data->dev_conf.rxmode.mq_mode) {
4153         case ETH_MQ_RX_VMDQ_DCB:
4154                 dcb_config->vt_mode = true;
4155                 if (hw->mac.type != ixgbe_mac_82598EB) {
4156                         config_dcb_rx = DCB_RX_CONFIG;
4157                         /*
4158                          *get dcb and VT rx configuration parameters
4159                          *from rte_eth_conf
4160                          */
4161                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4162                         /*Configure general VMDQ and DCB RX parameters*/
4163                         ixgbe_vmdq_dcb_configure(dev);
4164                 }
4165                 break;
4166         case ETH_MQ_RX_DCB:
4167         case ETH_MQ_RX_DCB_RSS:
4168                 dcb_config->vt_mode = false;
4169                 config_dcb_rx = DCB_RX_CONFIG;
4170                 /* Get dcb TX configuration parameters from rte_eth_conf */
4171                 ixgbe_dcb_rx_config(dev, dcb_config);
4172                 /*Configure general DCB RX parameters*/
4173                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4174                 break;
4175         default:
4176                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4177                 break;
4178         }
4179         switch (dev->data->dev_conf.txmode.mq_mode) {
4180         case ETH_MQ_TX_VMDQ_DCB:
4181                 dcb_config->vt_mode = true;
4182                 config_dcb_tx = DCB_TX_CONFIG;
4183                 /* get DCB and VT TX configuration parameters
4184                  * from rte_eth_conf
4185                  */
4186                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4187                 /*Configure general VMDQ and DCB TX parameters*/
4188                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4189                 break;
4190
4191         case ETH_MQ_TX_DCB:
4192                 dcb_config->vt_mode = false;
4193                 config_dcb_tx = DCB_TX_CONFIG;
4194                 /*get DCB TX configuration parameters from rte_eth_conf*/
4195                 ixgbe_dcb_tx_config(dev, dcb_config);
4196                 /*Configure general DCB TX parameters*/
4197                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4198                 break;
4199         default:
4200                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4201                 break;
4202         }
4203
4204         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4205         /* Unpack map */
4206         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4207         if (nb_tcs == ETH_4_TCS) {
4208                 /* Avoid un-configured priority mapping to TC0 */
4209                 uint8_t j = 4;
4210                 uint8_t mask = 0xFF;
4211
4212                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4213                         mask = (uint8_t)(mask & (~(1 << map[i])));
4214                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4215                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4216                                 map[j++] = i;
4217                         mask >>= 1;
4218                 }
4219                 /* Re-configure 4 TCs BW */
4220                 for (i = 0; i < nb_tcs; i++) {
4221                         tc = &dcb_config->tc_config[i];
4222                         if (bw_conf->tc_num != nb_tcs)
4223                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4224                                         (uint8_t)(100 / nb_tcs);
4225                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4226                                                 (uint8_t)(100 / nb_tcs);
4227                 }
4228                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4229                         tc = &dcb_config->tc_config[i];
4230                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4231                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4232                 }
4233         } else {
4234                 /* Re-configure 8 TCs BW */
4235                 for (i = 0; i < nb_tcs; i++) {
4236                         tc = &dcb_config->tc_config[i];
4237                         if (bw_conf->tc_num != nb_tcs)
4238                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4239                                         (uint8_t)(100 / nb_tcs + (i & 1));
4240                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4241                                 (uint8_t)(100 / nb_tcs + (i & 1));
4242                 }
4243         }
4244
4245         switch (hw->mac.type) {
4246         case ixgbe_mac_X550:
4247         case ixgbe_mac_X550EM_x:
4248         case ixgbe_mac_X550EM_a:
4249                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4250                 break;
4251         default:
4252                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4253                 break;
4254         }
4255
4256         if (config_dcb_rx) {
4257                 /* Set RX buffer size */
4258                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4259                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4260
4261                 for (i = 0; i < nb_tcs; i++) {
4262                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4263                 }
4264                 /* zero alloc all unused TCs */
4265                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4266                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4267                 }
4268         }
4269         if (config_dcb_tx) {
4270                 /* Only support an equally distributed
4271                  *  Tx packet buffer strategy.
4272                  */
4273                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4274                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4275
4276                 for (i = 0; i < nb_tcs; i++) {
4277                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4278                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4279                 }
4280                 /* Clear unused TCs, if any, to zero buffer size*/
4281                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4282                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4283                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4284                 }
4285         }
4286
4287         /*Calculates traffic class credits*/
4288         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4289                                 IXGBE_DCB_TX_CONFIG);
4290         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4291                                 IXGBE_DCB_RX_CONFIG);
4292
4293         if (config_dcb_rx) {
4294                 /* Unpack CEE standard containers */
4295                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4296                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4297                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4298                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4299                 /* Configure PG(ETS) RX */
4300                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4301         }
4302
4303         if (config_dcb_tx) {
4304                 /* Unpack CEE standard containers */
4305                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4306                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4307                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4308                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4309                 /* Configure PG(ETS) TX */
4310                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4311         }
4312
4313         /*Configure queue statistics registers*/
4314         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4315
4316         /* Check if the PFC is supported */
4317         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4318                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4319                 for (i = 0; i < nb_tcs; i++) {
4320                         /*
4321                         * If the TC count is 8,and the default high_water is 48,
4322                         * the low_water is 16 as default.
4323                         */
4324                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4325                         hw->fc.low_water[i] = pbsize / 4;
4326                         /* Enable pfc for this TC */
4327                         tc = &dcb_config->tc_config[i];
4328                         tc->pfc = ixgbe_dcb_pfc_enabled;
4329                 }
4330                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4331                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4332                         pfc_en &= 0x0F;
4333                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4334         }
4335
4336         return ret;
4337 }
4338
4339 /**
4340  * ixgbe_configure_dcb - Configure DCB  Hardware
4341  * @dev: pointer to rte_eth_dev
4342  */
4343 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4344 {
4345         struct ixgbe_dcb_config *dcb_cfg =
4346                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4347         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4348
4349         PMD_INIT_FUNC_TRACE();
4350
4351         /* check support mq_mode for DCB */
4352         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4353             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4354             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4355                 return;
4356
4357         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4358                 return;
4359
4360         /** Configure DCB hardware **/
4361         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4362 }
4363
4364 /*
4365  * VMDq only support for 10 GbE NIC.
4366  */
4367 static void
4368 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4369 {
4370         struct rte_eth_vmdq_rx_conf *cfg;
4371         struct ixgbe_hw *hw;
4372         enum rte_eth_nb_pools num_pools;
4373         uint32_t mrqc, vt_ctl, vlanctrl;
4374         uint32_t vmolr = 0;
4375         int i;
4376
4377         PMD_INIT_FUNC_TRACE();
4378         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4379         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4380         num_pools = cfg->nb_queue_pools;
4381
4382         ixgbe_rss_disable(dev);
4383
4384         /* MRQC: enable vmdq */
4385         mrqc = IXGBE_MRQC_VMDQEN;
4386         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4387
4388         /* PFVTCTL: turn on virtualisation and set the default pool */
4389         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4390         if (cfg->enable_default_pool)
4391                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4392         else
4393                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4394
4395         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4396
4397         for (i = 0; i < (int)num_pools; i++) {
4398                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4399                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4400         }
4401
4402         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4403         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4404         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4405         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4406
4407         /* VFTA - enable all vlan filters */
4408         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4409                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4410
4411         /* VFRE: pool enabling for receive - 64 */
4412         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4413         if (num_pools == ETH_64_POOLS)
4414                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4415
4416         /*
4417          * MPSAR - allow pools to read specific mac addresses
4418          * In this case, all pools should be able to read from mac addr 0
4419          */
4420         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4421         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4422
4423         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4424         for (i = 0; i < cfg->nb_pool_maps; i++) {
4425                 /* set vlan id in VF register and set the valid bit */
4426                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4427                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4428                 /*
4429                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4430                  * pools, we only need to use the first half of the register
4431                  * i.e. bits 0-31
4432                  */
4433                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4434                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4435                                         (cfg->pool_map[i].pools & UINT32_MAX));
4436                 else
4437                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4438                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4439
4440         }
4441
4442         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4443         if (cfg->enable_loop_back) {
4444                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4445                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4446                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4447         }
4448
4449         IXGBE_WRITE_FLUSH(hw);
4450 }
4451
4452 /*
4453  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4454  * @hw: pointer to hardware structure
4455  */
4456 static void
4457 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4458 {
4459         uint32_t reg;
4460         uint32_t q;
4461
4462         PMD_INIT_FUNC_TRACE();
4463         /*PF VF Transmit Enable*/
4464         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4465         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4466
4467         /* Disable the Tx desc arbiter so that MTQC can be changed */
4468         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4469         reg |= IXGBE_RTTDCS_ARBDIS;
4470         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4471
4472         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4473         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4474
4475         /* Disable drop for all queues */
4476         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4477                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4478                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4479
4480         /* Enable the Tx desc arbiter */
4481         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4482         reg &= ~IXGBE_RTTDCS_ARBDIS;
4483         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4484
4485         IXGBE_WRITE_FLUSH(hw);
4486 }
4487
4488 static int __rte_cold
4489 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4490 {
4491         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4492         uint64_t dma_addr;
4493         unsigned int i;
4494
4495         /* Initialize software ring entries */
4496         for (i = 0; i < rxq->nb_rx_desc; i++) {
4497                 volatile union ixgbe_adv_rx_desc *rxd;
4498                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4499
4500                 if (mbuf == NULL) {
4501                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4502                                      (unsigned) rxq->queue_id);
4503                         return -ENOMEM;
4504                 }
4505
4506                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4507                 mbuf->port = rxq->port_id;
4508
4509                 dma_addr =
4510                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4511                 rxd = &rxq->rx_ring[i];
4512                 rxd->read.hdr_addr = 0;
4513                 rxd->read.pkt_addr = dma_addr;
4514                 rxe[i].mbuf = mbuf;
4515         }
4516
4517         return 0;
4518 }
4519
4520 static int
4521 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4522 {
4523         struct ixgbe_hw *hw;
4524         uint32_t mrqc;
4525
4526         ixgbe_rss_configure(dev);
4527
4528         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4529
4530         /* MRQC: enable VF RSS */
4531         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4532         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4533         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4534         case ETH_64_POOLS:
4535                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4536                 break;
4537
4538         case ETH_32_POOLS:
4539                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4540                 break;
4541
4542         default:
4543                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4544                 return -EINVAL;
4545         }
4546
4547         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4548
4549         return 0;
4550 }
4551
4552 static int
4553 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4554 {
4555         struct ixgbe_hw *hw =
4556                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4557
4558         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4559         case ETH_64_POOLS:
4560                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4561                         IXGBE_MRQC_VMDQEN);
4562                 break;
4563
4564         case ETH_32_POOLS:
4565                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4566                         IXGBE_MRQC_VMDQRT4TCEN);
4567                 break;
4568
4569         case ETH_16_POOLS:
4570                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4571                         IXGBE_MRQC_VMDQRT8TCEN);
4572                 break;
4573         default:
4574                 PMD_INIT_LOG(ERR,
4575                         "invalid pool number in IOV mode");
4576                 break;
4577         }
4578         return 0;
4579 }
4580
4581 static int
4582 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4583 {
4584         struct ixgbe_hw *hw =
4585                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4586
4587         if (hw->mac.type == ixgbe_mac_82598EB)
4588                 return 0;
4589
4590         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4591                 /*
4592                  * SRIOV inactive scheme
4593                  * any DCB/RSS w/o VMDq multi-queue setting
4594                  */
4595                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4596                 case ETH_MQ_RX_RSS:
4597                 case ETH_MQ_RX_DCB_RSS:
4598                 case ETH_MQ_RX_VMDQ_RSS:
4599                         ixgbe_rss_configure(dev);
4600                         break;
4601
4602                 case ETH_MQ_RX_VMDQ_DCB:
4603                         ixgbe_vmdq_dcb_configure(dev);
4604                         break;
4605
4606                 case ETH_MQ_RX_VMDQ_ONLY:
4607                         ixgbe_vmdq_rx_hw_configure(dev);
4608                         break;
4609
4610                 case ETH_MQ_RX_NONE:
4611                 default:
4612                         /* if mq_mode is none, disable rss mode.*/
4613                         ixgbe_rss_disable(dev);
4614                         break;
4615                 }
4616         } else {
4617                 /* SRIOV active scheme
4618                  * Support RSS together with SRIOV.
4619                  */
4620                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4621                 case ETH_MQ_RX_RSS:
4622                 case ETH_MQ_RX_VMDQ_RSS:
4623                         ixgbe_config_vf_rss(dev);
4624                         break;
4625                 case ETH_MQ_RX_VMDQ_DCB:
4626                 case ETH_MQ_RX_DCB:
4627                 /* In SRIOV, the configuration is the same as VMDq case */
4628                         ixgbe_vmdq_dcb_configure(dev);
4629                         break;
4630                 /* DCB/RSS together with SRIOV is not supported */
4631                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4632                 case ETH_MQ_RX_DCB_RSS:
4633                         PMD_INIT_LOG(ERR,
4634                                 "Could not support DCB/RSS with VMDq & SRIOV");
4635                         return -1;
4636                 default:
4637                         ixgbe_config_vf_default(dev);
4638                         break;
4639                 }
4640         }
4641
4642         return 0;
4643 }
4644
4645 static int
4646 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4647 {
4648         struct ixgbe_hw *hw =
4649                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4650         uint32_t mtqc;
4651         uint32_t rttdcs;
4652
4653         if (hw->mac.type == ixgbe_mac_82598EB)
4654                 return 0;
4655
4656         /* disable arbiter before setting MTQC */
4657         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4658         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4659         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4660
4661         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4662                 /*
4663                  * SRIOV inactive scheme
4664                  * any DCB w/o VMDq multi-queue setting
4665                  */
4666                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4667                         ixgbe_vmdq_tx_hw_configure(hw);
4668                 else {
4669                         mtqc = IXGBE_MTQC_64Q_1PB;
4670                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4671                 }
4672         } else {
4673                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4674
4675                 /*
4676                  * SRIOV active scheme
4677                  * FIXME if support DCB together with VMDq & SRIOV
4678                  */
4679                 case ETH_64_POOLS:
4680                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4681                         break;
4682                 case ETH_32_POOLS:
4683                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4684                         break;
4685                 case ETH_16_POOLS:
4686                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4687                                 IXGBE_MTQC_8TC_8TQ;
4688                         break;
4689                 default:
4690                         mtqc = IXGBE_MTQC_64Q_1PB;
4691                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4692                 }
4693                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4694         }
4695
4696         /* re-enable arbiter */
4697         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4698         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4699
4700         return 0;
4701 }
4702
4703 /**
4704  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4705  *
4706  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4707  * spec rev. 3.0 chapter 8.2.3.8.13.
4708  *
4709  * @pool Memory pool of the Rx queue
4710  */
4711 static inline uint32_t
4712 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4713 {
4714         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4715
4716         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4717         uint16_t maxdesc =
4718                 RTE_IPV4_MAX_PKT_LEN /
4719                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4720
4721         if (maxdesc >= 16)
4722                 return IXGBE_RSCCTL_MAXDESC_16;
4723         else if (maxdesc >= 8)
4724                 return IXGBE_RSCCTL_MAXDESC_8;
4725         else if (maxdesc >= 4)
4726                 return IXGBE_RSCCTL_MAXDESC_4;
4727         else
4728                 return IXGBE_RSCCTL_MAXDESC_1;
4729 }
4730
4731 /**
4732  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4733  * interrupt
4734  *
4735  * (Taken from FreeBSD tree)
4736  * (yes this is all very magic and confusing :)
4737  *
4738  * @dev port handle
4739  * @entry the register array entry
4740  * @vector the MSIX vector for this queue
4741  * @type RX/TX/MISC
4742  */
4743 static void
4744 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4745 {
4746         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4747         u32 ivar, index;
4748
4749         vector |= IXGBE_IVAR_ALLOC_VAL;
4750
4751         switch (hw->mac.type) {
4752
4753         case ixgbe_mac_82598EB:
4754                 if (type == -1)
4755                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4756                 else
4757                         entry += (type * 64);
4758                 index = (entry >> 2) & 0x1F;
4759                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4760                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4761                 ivar |= (vector << (8 * (entry & 0x3)));
4762                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4763                 break;
4764
4765         case ixgbe_mac_82599EB:
4766         case ixgbe_mac_X540:
4767                 if (type == -1) { /* MISC IVAR */
4768                         index = (entry & 1) * 8;
4769                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4770                         ivar &= ~(0xFF << index);
4771                         ivar |= (vector << index);
4772                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4773                 } else {        /* RX/TX IVARS */
4774                         index = (16 * (entry & 1)) + (8 * type);
4775                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4776                         ivar &= ~(0xFF << index);
4777                         ivar |= (vector << index);
4778                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4779                 }
4780
4781                 break;
4782
4783         default:
4784                 break;
4785         }
4786 }
4787
4788 void __rte_cold
4789 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4790 {
4791         uint16_t i, rx_using_sse;
4792         struct ixgbe_adapter *adapter = dev->data->dev_private;
4793
4794         /*
4795          * In order to allow Vector Rx there are a few configuration
4796          * conditions to be met and Rx Bulk Allocation should be allowed.
4797          */
4798         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4799             !adapter->rx_bulk_alloc_allowed ||
4800                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4801                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4802                                     "preconditions",
4803                              dev->data->port_id);
4804
4805                 adapter->rx_vec_allowed = false;
4806         }
4807
4808         /*
4809          * Initialize the appropriate LRO callback.
4810          *
4811          * If all queues satisfy the bulk allocation preconditions
4812          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4813          * Otherwise use a single allocation version.
4814          */
4815         if (dev->data->lro) {
4816                 if (adapter->rx_bulk_alloc_allowed) {
4817                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4818                                            "allocation version");
4819                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4820                 } else {
4821                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4822                                            "allocation version");
4823                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4824                 }
4825         } else if (dev->data->scattered_rx) {
4826                 /*
4827                  * Set the non-LRO scattered callback: there are Vector and
4828                  * single allocation versions.
4829                  */
4830                 if (adapter->rx_vec_allowed) {
4831                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4832                                             "callback (port=%d).",
4833                                      dev->data->port_id);
4834
4835                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4836                 } else if (adapter->rx_bulk_alloc_allowed) {
4837                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4838                                            "allocation callback (port=%d).",
4839                                      dev->data->port_id);
4840                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4841                 } else {
4842                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4843                                             "single allocation) "
4844                                             "Scattered Rx callback "
4845                                             "(port=%d).",
4846                                      dev->data->port_id);
4847
4848                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4849                 }
4850         /*
4851          * Below we set "simple" callbacks according to port/queues parameters.
4852          * If parameters allow we are going to choose between the following
4853          * callbacks:
4854          *    - Vector
4855          *    - Bulk Allocation
4856          *    - Single buffer allocation (the simplest one)
4857          */
4858         } else if (adapter->rx_vec_allowed) {
4859                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4860                                     "burst size no less than %d (port=%d).",
4861                              RTE_IXGBE_DESCS_PER_LOOP,
4862                              dev->data->port_id);
4863
4864                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4865         } else if (adapter->rx_bulk_alloc_allowed) {
4866                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4867                                     "satisfied. Rx Burst Bulk Alloc function "
4868                                     "will be used on port=%d.",
4869                              dev->data->port_id);
4870
4871                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4872         } else {
4873                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4874                                     "satisfied, or Scattered Rx is requested "
4875                                     "(port=%d).",
4876                              dev->data->port_id);
4877
4878                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4879         }
4880
4881         /* Propagate information about RX function choice through all queues. */
4882
4883         rx_using_sse =
4884                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4885                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4886
4887         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4888                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4889
4890                 rxq->rx_using_sse = rx_using_sse;
4891 #ifdef RTE_LIB_SECURITY
4892                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4893                                 DEV_RX_OFFLOAD_SECURITY);
4894 #endif
4895         }
4896 }
4897
4898 /**
4899  * ixgbe_set_rsc - configure RSC related port HW registers
4900  *
4901  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4902  * of 82599 Spec (x540 configuration is virtually the same).
4903  *
4904  * @dev port handle
4905  *
4906  * Returns 0 in case of success or a non-zero error code
4907  */
4908 static int
4909 ixgbe_set_rsc(struct rte_eth_dev *dev)
4910 {
4911         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4912         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4913         struct rte_eth_dev_info dev_info = { 0 };
4914         bool rsc_capable = false;
4915         uint16_t i;
4916         uint32_t rdrxctl;
4917         uint32_t rfctl;
4918
4919         /* Sanity check */
4920         dev->dev_ops->dev_infos_get(dev, &dev_info);
4921         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4922                 rsc_capable = true;
4923
4924         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4925                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4926                                    "support it");
4927                 return -EINVAL;
4928         }
4929
4930         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4931
4932         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4933              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4934                 /*
4935                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4936                  * 3.0 RSC configuration requires HW CRC stripping being
4937                  * enabled. If user requested both HW CRC stripping off
4938                  * and RSC on - return an error.
4939                  */
4940                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4941                                     "is disabled");
4942                 return -EINVAL;
4943         }
4944
4945         /* RFCTL configuration  */
4946         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4947         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4948                 rfctl &= ~IXGBE_RFCTL_RSC_DIS;
4949         else
4950                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4951         /* disable NFS filtering */
4952         rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS;
4953         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4954
4955         /* If LRO hasn't been requested - we are done here. */
4956         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4957                 return 0;
4958
4959         /* Set RDRXCTL.RSCACKC bit */
4960         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4961         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4962         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4963
4964         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4965         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4966                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4967                 uint32_t srrctl =
4968                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4969                 uint32_t rscctl =
4970                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4971                 uint32_t psrtype =
4972                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4973                 uint32_t eitr =
4974                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4975
4976                 /*
4977                  * ixgbe PMD doesn't support header-split at the moment.
4978                  *
4979                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4980                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4981                  * should be configured even if header split is not
4982                  * enabled. We will configure it 128 bytes following the
4983                  * recommendation in the spec.
4984                  */
4985                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4986                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4987                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4988
4989                 /*
4990                  * TODO: Consider setting the Receive Descriptor Minimum
4991                  * Threshold Size for an RSC case. This is not an obviously
4992                  * beneficiary option but the one worth considering...
4993                  */
4994
4995                 rscctl |= IXGBE_RSCCTL_RSCEN;
4996                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4997                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4998
4999                 /*
5000                  * RSC: Set ITR interval corresponding to 2K ints/s.
5001                  *
5002                  * Full-sized RSC aggregations for a 10Gb/s link will
5003                  * arrive at about 20K aggregation/s rate.
5004                  *
5005                  * 2K inst/s rate will make only 10% of the
5006                  * aggregations to be closed due to the interrupt timer
5007                  * expiration for a streaming at wire-speed case.
5008                  *
5009                  * For a sparse streaming case this setting will yield
5010                  * at most 500us latency for a single RSC aggregation.
5011                  */
5012                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
5013                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
5014                 eitr |= IXGBE_EITR_CNT_WDIS;
5015
5016                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5017                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
5018                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
5019                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
5020
5021                 /*
5022                  * RSC requires the mapping of the queue to the
5023                  * interrupt vector.
5024                  */
5025                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5026         }
5027
5028         dev->data->lro = 1;
5029
5030         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5031
5032         return 0;
5033 }
5034
5035 /*
5036  * Initializes Receive Unit.
5037  */
5038 int __rte_cold
5039 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5040 {
5041         struct ixgbe_hw     *hw;
5042         struct ixgbe_rx_queue *rxq;
5043         uint64_t bus_addr;
5044         uint32_t rxctrl;
5045         uint32_t fctrl;
5046         uint32_t hlreg0;
5047         uint32_t maxfrs;
5048         uint32_t srrctl;
5049         uint32_t rdrxctl;
5050         uint32_t rxcsum;
5051         uint16_t buf_size;
5052         uint16_t i;
5053         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5054         int rc;
5055
5056         PMD_INIT_FUNC_TRACE();
5057         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5058
5059         /*
5060          * Make sure receives are disabled while setting
5061          * up the RX context (registers, descriptor rings, etc.).
5062          */
5063         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5064         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5065
5066         /* Enable receipt of broadcasted frames */
5067         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5068         fctrl |= IXGBE_FCTRL_BAM;
5069         fctrl |= IXGBE_FCTRL_DPF;
5070         fctrl |= IXGBE_FCTRL_PMCF;
5071         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5072
5073         /*
5074          * Configure CRC stripping, if any.
5075          */
5076         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5077         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5078                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5079         else
5080                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5081
5082         /*
5083          * Configure jumbo frame support, if any.
5084          */
5085         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5086                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5087                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5088                 maxfrs &= 0x0000FFFF;
5089                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5090                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5091         } else
5092                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5093
5094         /*
5095          * If loopback mode is configured, set LPBK bit.
5096          */
5097         if (dev->data->dev_conf.lpbk_mode != 0) {
5098                 rc = ixgbe_check_supported_loopback_mode(dev);
5099                 if (rc < 0) {
5100                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5101                         return rc;
5102                 }
5103                 hlreg0 |= IXGBE_HLREG0_LPBK;
5104         } else {
5105                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5106         }
5107
5108         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5109
5110         /*
5111          * Assume no header split and no VLAN strip support
5112          * on any Rx queue first .
5113          */
5114         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5115         /* Setup RX queues */
5116         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5117                 rxq = dev->data->rx_queues[i];
5118
5119                 /*
5120                  * Reset crc_len in case it was changed after queue setup by a
5121                  * call to configure.
5122                  */
5123                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5124                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5125                 else
5126                         rxq->crc_len = 0;
5127
5128                 /* Setup the Base and Length of the Rx Descriptor Rings */
5129                 bus_addr = rxq->rx_ring_phys_addr;
5130                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5131                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5132                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5133                                 (uint32_t)(bus_addr >> 32));
5134                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5135                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5136                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5137                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5138
5139                 /* Configure the SRRCTL register */
5140                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5141
5142                 /* Set if packets are dropped when no descriptors available */
5143                 if (rxq->drop_en)
5144                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5145
5146                 /*
5147                  * Configure the RX buffer size in the BSIZEPACKET field of
5148                  * the SRRCTL register of the queue.
5149                  * The value is in 1 KB resolution. Valid values can be from
5150                  * 1 KB to 16 KB.
5151                  */
5152                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5153                         RTE_PKTMBUF_HEADROOM);
5154                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5155                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5156
5157                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5158
5159                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5160                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5161
5162                 /* It adds dual VLAN length for supporting dual VLAN */
5163                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5164                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5165                         dev->data->scattered_rx = 1;
5166                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5167                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5168         }
5169
5170         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5171                 dev->data->scattered_rx = 1;
5172
5173         /*
5174          * Device configured with multiple RX queues.
5175          */
5176         ixgbe_dev_mq_rx_configure(dev);
5177
5178         /*
5179          * Setup the Checksum Register.
5180          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5181          * Enable IP/L4 checkum computation by hardware if requested to do so.
5182          */
5183         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5184         rxcsum |= IXGBE_RXCSUM_PCSD;
5185         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5186                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5187         else
5188                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5189
5190         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5191
5192         if (hw->mac.type == ixgbe_mac_82599EB ||
5193             hw->mac.type == ixgbe_mac_X540) {
5194                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5195                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5196                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5197                 else
5198                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5199                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5200                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5201         }
5202
5203         rc = ixgbe_set_rsc(dev);
5204         if (rc)
5205                 return rc;
5206
5207         ixgbe_set_rx_function(dev);
5208
5209         return 0;
5210 }
5211
5212 /*
5213  * Initializes Transmit Unit.
5214  */
5215 void __rte_cold
5216 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5217 {
5218         struct ixgbe_hw     *hw;
5219         struct ixgbe_tx_queue *txq;
5220         uint64_t bus_addr;
5221         uint32_t hlreg0;
5222         uint32_t txctrl;
5223         uint16_t i;
5224
5225         PMD_INIT_FUNC_TRACE();
5226         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5227
5228         /* Enable TX CRC (checksum offload requirement) and hw padding
5229          * (TSO requirement)
5230          */
5231         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5232         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5233         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5234
5235         /* Setup the Base and Length of the Tx Descriptor Rings */
5236         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5237                 txq = dev->data->tx_queues[i];
5238
5239                 bus_addr = txq->tx_ring_phys_addr;
5240                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5241                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5242                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5243                                 (uint32_t)(bus_addr >> 32));
5244                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5245                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5246                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5247                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5248                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5249
5250                 /*
5251                  * Disable Tx Head Writeback RO bit, since this hoses
5252                  * bookkeeping if things aren't delivered in order.
5253                  */
5254                 switch (hw->mac.type) {
5255                 case ixgbe_mac_82598EB:
5256                         txctrl = IXGBE_READ_REG(hw,
5257                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5258                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5259                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5260                                         txctrl);
5261                         break;
5262
5263                 case ixgbe_mac_82599EB:
5264                 case ixgbe_mac_X540:
5265                 case ixgbe_mac_X550:
5266                 case ixgbe_mac_X550EM_x:
5267                 case ixgbe_mac_X550EM_a:
5268                 default:
5269                         txctrl = IXGBE_READ_REG(hw,
5270                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5271                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5272                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5273                                         txctrl);
5274                         break;
5275                 }
5276         }
5277
5278         /* Device configured with multiple TX queues. */
5279         ixgbe_dev_mq_tx_configure(dev);
5280 }
5281
5282 /*
5283  * Check if requested loopback mode is supported
5284  */
5285 int
5286 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5287 {
5288         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5289
5290         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5291                 if (hw->mac.type == ixgbe_mac_82599EB ||
5292                      hw->mac.type == ixgbe_mac_X540 ||
5293                      hw->mac.type == ixgbe_mac_X550 ||
5294                      hw->mac.type == ixgbe_mac_X550EM_x ||
5295                      hw->mac.type == ixgbe_mac_X550EM_a)
5296                         return 0;
5297
5298         return -ENOTSUP;
5299 }
5300
5301 /*
5302  * Set up link for 82599 loopback mode Tx->Rx.
5303  */
5304 static inline void __rte_cold
5305 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5306 {
5307         PMD_INIT_FUNC_TRACE();
5308
5309         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5310                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5311                                 IXGBE_SUCCESS) {
5312                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5313                         /* ignore error */
5314                         return;
5315                 }
5316         }
5317
5318         /* Restart link */
5319         IXGBE_WRITE_REG(hw,
5320                         IXGBE_AUTOC,
5321                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5322         ixgbe_reset_pipeline_82599(hw);
5323
5324         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5325         msec_delay(50);
5326 }
5327
5328
5329 /*
5330  * Start Transmit and Receive Units.
5331  */
5332 int __rte_cold
5333 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5334 {
5335         struct ixgbe_hw     *hw;
5336         struct ixgbe_tx_queue *txq;
5337         struct ixgbe_rx_queue *rxq;
5338         uint32_t txdctl;
5339         uint32_t dmatxctl;
5340         uint32_t rxctrl;
5341         uint16_t i;
5342         int ret = 0;
5343
5344         PMD_INIT_FUNC_TRACE();
5345         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5346
5347         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5348                 txq = dev->data->tx_queues[i];
5349                 /* Setup Transmit Threshold Registers */
5350                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5351                 txdctl |= txq->pthresh & 0x7F;
5352                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5353                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5354                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5355         }
5356
5357         if (hw->mac.type != ixgbe_mac_82598EB) {
5358                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5359                 dmatxctl |= IXGBE_DMATXCTL_TE;
5360                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5361         }
5362
5363         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5364                 txq = dev->data->tx_queues[i];
5365                 if (!txq->tx_deferred_start) {
5366                         ret = ixgbe_dev_tx_queue_start(dev, i);
5367                         if (ret < 0)
5368                                 return ret;
5369                 }
5370         }
5371
5372         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5373                 rxq = dev->data->rx_queues[i];
5374                 if (!rxq->rx_deferred_start) {
5375                         ret = ixgbe_dev_rx_queue_start(dev, i);
5376                         if (ret < 0)
5377                                 return ret;
5378                 }
5379         }
5380
5381         /* Enable Receive engine */
5382         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5383         if (hw->mac.type == ixgbe_mac_82598EB)
5384                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5385         rxctrl |= IXGBE_RXCTRL_RXEN;
5386         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5387
5388         /* If loopback mode is enabled, set up the link accordingly */
5389         if (dev->data->dev_conf.lpbk_mode != 0) {
5390                 if (hw->mac.type == ixgbe_mac_82599EB)
5391                         ixgbe_setup_loopback_link_82599(hw);
5392                 else if (hw->mac.type == ixgbe_mac_X540 ||
5393                      hw->mac.type == ixgbe_mac_X550 ||
5394                      hw->mac.type == ixgbe_mac_X550EM_x ||
5395                      hw->mac.type == ixgbe_mac_X550EM_a)
5396                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5397         }
5398
5399 #ifdef RTE_LIB_SECURITY
5400         if ((dev->data->dev_conf.rxmode.offloads &
5401                         DEV_RX_OFFLOAD_SECURITY) ||
5402                 (dev->data->dev_conf.txmode.offloads &
5403                         DEV_TX_OFFLOAD_SECURITY)) {
5404                 ret = ixgbe_crypto_enable_ipsec(dev);
5405                 if (ret != 0) {
5406                         PMD_DRV_LOG(ERR,
5407                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5408                                     ret);
5409                         return ret;
5410                 }
5411         }
5412 #endif
5413
5414         return 0;
5415 }
5416
5417 /*
5418  * Start Receive Units for specified queue.
5419  */
5420 int __rte_cold
5421 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5422 {
5423         struct ixgbe_hw     *hw;
5424         struct ixgbe_rx_queue *rxq;
5425         uint32_t rxdctl;
5426         int poll_ms;
5427
5428         PMD_INIT_FUNC_TRACE();
5429         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5430
5431         rxq = dev->data->rx_queues[rx_queue_id];
5432
5433         /* Allocate buffers for descriptor rings */
5434         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5435                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5436                              rx_queue_id);
5437                 return -1;
5438         }
5439         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5440         rxdctl |= IXGBE_RXDCTL_ENABLE;
5441         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5442
5443         /* Wait until RX Enable ready */
5444         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5445         do {
5446                 rte_delay_ms(1);
5447                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5448         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5449         if (!poll_ms)
5450                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5451         rte_wmb();
5452         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5453         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5454         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5455
5456         return 0;
5457 }
5458
5459 /*
5460  * Stop Receive Units for specified queue.
5461  */
5462 int __rte_cold
5463 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5464 {
5465         struct ixgbe_hw     *hw;
5466         struct ixgbe_adapter *adapter = dev->data->dev_private;
5467         struct ixgbe_rx_queue *rxq;
5468         uint32_t rxdctl;
5469         int poll_ms;
5470
5471         PMD_INIT_FUNC_TRACE();
5472         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5473
5474         rxq = dev->data->rx_queues[rx_queue_id];
5475
5476         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5477         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5478         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5479
5480         /* Wait until RX Enable bit clear */
5481         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5482         do {
5483                 rte_delay_ms(1);
5484                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5485         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5486         if (!poll_ms)
5487                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5488
5489         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5490
5491         ixgbe_rx_queue_release_mbufs(rxq);
5492         ixgbe_reset_rx_queue(adapter, rxq);
5493         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5494
5495         return 0;
5496 }
5497
5498
5499 /*
5500  * Start Transmit Units for specified queue.
5501  */
5502 int __rte_cold
5503 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5504 {
5505         struct ixgbe_hw     *hw;
5506         struct ixgbe_tx_queue *txq;
5507         uint32_t txdctl;
5508         int poll_ms;
5509
5510         PMD_INIT_FUNC_TRACE();
5511         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5512
5513         txq = dev->data->tx_queues[tx_queue_id];
5514         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5515         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5516         txdctl |= IXGBE_TXDCTL_ENABLE;
5517         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5518
5519         /* Wait until TX Enable ready */
5520         if (hw->mac.type == ixgbe_mac_82599EB) {
5521                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5522                 do {
5523                         rte_delay_ms(1);
5524                         txdctl = IXGBE_READ_REG(hw,
5525                                 IXGBE_TXDCTL(txq->reg_idx));
5526                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5527                 if (!poll_ms)
5528                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5529                                 tx_queue_id);
5530         }
5531         rte_wmb();
5532         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5533         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5534
5535         return 0;
5536 }
5537
5538 /*
5539  * Stop Transmit Units for specified queue.
5540  */
5541 int __rte_cold
5542 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5543 {
5544         struct ixgbe_hw     *hw;
5545         struct ixgbe_tx_queue *txq;
5546         uint32_t txdctl;
5547         uint32_t txtdh, txtdt;
5548         int poll_ms;
5549
5550         PMD_INIT_FUNC_TRACE();
5551         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5552
5553         txq = dev->data->tx_queues[tx_queue_id];
5554
5555         /* Wait until TX queue is empty */
5556         if (hw->mac.type == ixgbe_mac_82599EB) {
5557                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5558                 do {
5559                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5560                         txtdh = IXGBE_READ_REG(hw,
5561                                                IXGBE_TDH(txq->reg_idx));
5562                         txtdt = IXGBE_READ_REG(hw,
5563                                                IXGBE_TDT(txq->reg_idx));
5564                 } while (--poll_ms && (txtdh != txtdt));
5565                 if (!poll_ms)
5566                         PMD_INIT_LOG(ERR,
5567                                 "Tx Queue %d is not empty when stopping.",
5568                                 tx_queue_id);
5569         }
5570
5571         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5572         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5573         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5574
5575         /* Wait until TX Enable bit clear */
5576         if (hw->mac.type == ixgbe_mac_82599EB) {
5577                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5578                 do {
5579                         rte_delay_ms(1);
5580                         txdctl = IXGBE_READ_REG(hw,
5581                                                 IXGBE_TXDCTL(txq->reg_idx));
5582                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5583                 if (!poll_ms)
5584                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5585                                 tx_queue_id);
5586         }
5587
5588         if (txq->ops != NULL) {
5589                 txq->ops->release_mbufs(txq);
5590                 txq->ops->reset(txq);
5591         }
5592         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5593
5594         return 0;
5595 }
5596
5597 void
5598 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5599         struct rte_eth_rxq_info *qinfo)
5600 {
5601         struct ixgbe_rx_queue *rxq;
5602
5603         rxq = dev->data->rx_queues[queue_id];
5604
5605         qinfo->mp = rxq->mb_pool;
5606         qinfo->scattered_rx = dev->data->scattered_rx;
5607         qinfo->nb_desc = rxq->nb_rx_desc;
5608
5609         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5610         qinfo->conf.rx_drop_en = rxq->drop_en;
5611         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5612         qinfo->conf.offloads = rxq->offloads;
5613 }
5614
5615 void
5616 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5617         struct rte_eth_txq_info *qinfo)
5618 {
5619         struct ixgbe_tx_queue *txq;
5620
5621         txq = dev->data->tx_queues[queue_id];
5622
5623         qinfo->nb_desc = txq->nb_tx_desc;
5624
5625         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5626         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5627         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5628
5629         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5630         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5631         qinfo->conf.offloads = txq->offloads;
5632         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5633 }
5634
5635 /*
5636  * [VF] Initializes Receive Unit.
5637  */
5638 int __rte_cold
5639 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5640 {
5641         struct ixgbe_hw     *hw;
5642         struct ixgbe_rx_queue *rxq;
5643         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5644         uint64_t bus_addr;
5645         uint32_t srrctl, psrtype = 0;
5646         uint16_t buf_size;
5647         uint16_t i;
5648         int ret;
5649
5650         PMD_INIT_FUNC_TRACE();
5651         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5652
5653         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5654                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5655                         "it should be power of 2");
5656                 return -1;
5657         }
5658
5659         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5660                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5661                         "it should be equal to or less than %d",
5662                         hw->mac.max_rx_queues);
5663                 return -1;
5664         }
5665
5666         /*
5667          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5668          * disables the VF receipt of packets if the PF MTU is > 1500.
5669          * This is done to deal with 82599 limitations that imposes
5670          * the PF and all VFs to share the same MTU.
5671          * Then, the PF driver enables again the VF receipt of packet when
5672          * the VF driver issues a IXGBE_VF_SET_LPE request.
5673          * In the meantime, the VF device cannot be used, even if the VF driver
5674          * and the Guest VM network stack are ready to accept packets with a
5675          * size up to the PF MTU.
5676          * As a work-around to this PF behaviour, force the call to
5677          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5678          * VF packets received can work in all cases.
5679          */
5680         if (ixgbevf_rlpml_set_vf(hw,
5681             (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len)) {
5682                 PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5683                              dev->data->dev_conf.rxmode.max_rx_pkt_len);
5684                 return -EINVAL;
5685         }
5686
5687         /*
5688          * Assume no header split and no VLAN strip support
5689          * on any Rx queue first .
5690          */
5691         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5692         /* Setup RX queues */
5693         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5694                 rxq = dev->data->rx_queues[i];
5695
5696                 /* Allocate buffers for descriptor rings */
5697                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5698                 if (ret)
5699                         return ret;
5700
5701                 /* Setup the Base and Length of the Rx Descriptor Rings */
5702                 bus_addr = rxq->rx_ring_phys_addr;
5703
5704                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5705                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5706                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5707                                 (uint32_t)(bus_addr >> 32));
5708                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5709                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5710                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5711                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5712
5713
5714                 /* Configure the SRRCTL register */
5715                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5716
5717                 /* Set if packets are dropped when no descriptors available */
5718                 if (rxq->drop_en)
5719                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5720
5721                 /*
5722                  * Configure the RX buffer size in the BSIZEPACKET field of
5723                  * the SRRCTL register of the queue.
5724                  * The value is in 1 KB resolution. Valid values can be from
5725                  * 1 KB to 16 KB.
5726                  */
5727                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5728                         RTE_PKTMBUF_HEADROOM);
5729                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5730                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5731
5732                 /*
5733                  * VF modification to write virtual function SRRCTL register
5734                  */
5735                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5736
5737                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5738                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5739
5740                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5741                     /* It adds dual VLAN length for supporting dual VLAN */
5742                     (rxmode->max_rx_pkt_len +
5743                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5744                         if (!dev->data->scattered_rx)
5745                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5746                         dev->data->scattered_rx = 1;
5747                 }
5748
5749                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5750                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5751         }
5752
5753         /* Set RQPL for VF RSS according to max Rx queue */
5754         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5755                 IXGBE_PSRTYPE_RQPL_SHIFT;
5756         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5757
5758         ixgbe_set_rx_function(dev);
5759
5760         return 0;
5761 }
5762
5763 /*
5764  * [VF] Initializes Transmit Unit.
5765  */
5766 void __rte_cold
5767 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5768 {
5769         struct ixgbe_hw     *hw;
5770         struct ixgbe_tx_queue *txq;
5771         uint64_t bus_addr;
5772         uint32_t txctrl;
5773         uint16_t i;
5774
5775         PMD_INIT_FUNC_TRACE();
5776         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5777
5778         /* Setup the Base and Length of the Tx Descriptor Rings */
5779         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5780                 txq = dev->data->tx_queues[i];
5781                 bus_addr = txq->tx_ring_phys_addr;
5782                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5783                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5784                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5785                                 (uint32_t)(bus_addr >> 32));
5786                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5787                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5788                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5789                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5790                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5791
5792                 /*
5793                  * Disable Tx Head Writeback RO bit, since this hoses
5794                  * bookkeeping if things aren't delivered in order.
5795                  */
5796                 txctrl = IXGBE_READ_REG(hw,
5797                                 IXGBE_VFDCA_TXCTRL(i));
5798                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5799                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5800                                 txctrl);
5801         }
5802 }
5803
5804 /*
5805  * [VF] Start Transmit and Receive Units.
5806  */
5807 void __rte_cold
5808 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5809 {
5810         struct ixgbe_hw     *hw;
5811         struct ixgbe_tx_queue *txq;
5812         struct ixgbe_rx_queue *rxq;
5813         uint32_t txdctl;
5814         uint32_t rxdctl;
5815         uint16_t i;
5816         int poll_ms;
5817
5818         PMD_INIT_FUNC_TRACE();
5819         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5820
5821         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5822                 txq = dev->data->tx_queues[i];
5823                 /* Setup Transmit Threshold Registers */
5824                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5825                 txdctl |= txq->pthresh & 0x7F;
5826                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5827                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5828                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5829         }
5830
5831         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5832
5833                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5834                 txdctl |= IXGBE_TXDCTL_ENABLE;
5835                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5836
5837                 poll_ms = 10;
5838                 /* Wait until TX Enable ready */
5839                 do {
5840                         rte_delay_ms(1);
5841                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5842                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5843                 if (!poll_ms)
5844                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5845         }
5846         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5847
5848                 rxq = dev->data->rx_queues[i];
5849
5850                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5851                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5852                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5853
5854                 /* Wait until RX Enable ready */
5855                 poll_ms = 10;
5856                 do {
5857                         rte_delay_ms(1);
5858                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5859                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5860                 if (!poll_ms)
5861                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5862                 rte_wmb();
5863                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5864
5865         }
5866 }
5867
5868 int
5869 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5870                     const struct rte_flow_action_rss *in)
5871 {
5872         if (in->key_len > RTE_DIM(out->key) ||
5873             in->queue_num > RTE_DIM(out->queue))
5874                 return -EINVAL;
5875         out->conf = (struct rte_flow_action_rss){
5876                 .func = in->func,
5877                 .level = in->level,
5878                 .types = in->types,
5879                 .key_len = in->key_len,
5880                 .queue_num = in->queue_num,
5881                 .key = memcpy(out->key, in->key, in->key_len),
5882                 .queue = memcpy(out->queue, in->queue,
5883                                 sizeof(*in->queue) * in->queue_num),
5884         };
5885         return 0;
5886 }
5887
5888 int
5889 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5890                       const struct rte_flow_action_rss *with)
5891 {
5892         return (comp->func == with->func &&
5893                 comp->level == with->level &&
5894                 comp->types == with->types &&
5895                 comp->key_len == with->key_len &&
5896                 comp->queue_num == with->queue_num &&
5897                 !memcmp(comp->key, with->key, with->key_len) &&
5898                 !memcmp(comp->queue, with->queue,
5899                         sizeof(*with->queue) * with->queue_num));
5900 }
5901
5902 int
5903 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5904                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5905 {
5906         struct ixgbe_hw *hw;
5907         uint32_t reta;
5908         uint16_t i;
5909         uint16_t j;
5910         uint16_t sp_reta_size;
5911         uint32_t reta_reg;
5912         struct rte_eth_rss_conf rss_conf = {
5913                 .rss_key = conf->conf.key_len ?
5914                         (void *)(uintptr_t)conf->conf.key : NULL,
5915                 .rss_key_len = conf->conf.key_len,
5916                 .rss_hf = conf->conf.types,
5917         };
5918         struct ixgbe_filter_info *filter_info =
5919                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5920
5921         PMD_INIT_FUNC_TRACE();
5922         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5923
5924         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5925
5926         if (!add) {
5927                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5928                                           &conf->conf)) {
5929                         ixgbe_rss_disable(dev);
5930                         memset(&filter_info->rss_info, 0,
5931                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5932                         return 0;
5933                 }
5934                 return -EINVAL;
5935         }
5936
5937         if (filter_info->rss_info.conf.queue_num)
5938                 return -EINVAL;
5939         /* Fill in redirection table
5940          * The byte-swap is needed because NIC registers are in
5941          * little-endian order.
5942          */
5943         reta = 0;
5944         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5945                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5946
5947                 if (j == conf->conf.queue_num)
5948                         j = 0;
5949                 reta = (reta << 8) | conf->conf.queue[j];
5950                 if ((i & 3) == 3)
5951                         IXGBE_WRITE_REG(hw, reta_reg,
5952                                         rte_bswap32(reta));
5953         }
5954
5955         /* Configure the RSS key and the RSS protocols used to compute
5956          * the RSS hash of input packets.
5957          */
5958         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5959                 ixgbe_rss_disable(dev);
5960                 return 0;
5961         }
5962         if (rss_conf.rss_key == NULL)
5963                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5964         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5965
5966         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5967                 return -EINVAL;
5968
5969         return 0;
5970 }
5971
5972 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5973 #if defined(RTE_ARCH_PPC_64)
5974 int
5975 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5976 {
5977         return -1;
5978 }
5979
5980 uint16_t
5981 ixgbe_recv_pkts_vec(
5982         void __rte_unused *rx_queue,
5983         struct rte_mbuf __rte_unused **rx_pkts,
5984         uint16_t __rte_unused nb_pkts)
5985 {
5986         return 0;
5987 }
5988
5989 uint16_t
5990 ixgbe_recv_scattered_pkts_vec(
5991         void __rte_unused *rx_queue,
5992         struct rte_mbuf __rte_unused **rx_pkts,
5993         uint16_t __rte_unused nb_pkts)
5994 {
5995         return 0;
5996 }
5997
5998 int
5999 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
6000 {
6001         return -1;
6002 }
6003
6004 uint16_t
6005 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
6006                 struct rte_mbuf __rte_unused **tx_pkts,
6007                 uint16_t __rte_unused nb_pkts)
6008 {
6009         return 0;
6010 }
6011
6012 int
6013 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
6014 {
6015         return -1;
6016 }
6017
6018 void
6019 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
6020 {
6021         return;
6022 }
6023 #endif