security: switch metadata to dynamic mbuf field
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2016 Intel Corporation.
3  * Copyright 2014 6WIND S.A.
4  */
5
6 #include <sys/queue.h>
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdarg.h>
14 #include <unistd.h>
15 #include <inttypes.h>
16
17 #include <rte_byteorder.h>
18 #include <rte_common.h>
19 #include <rte_cycles.h>
20 #include <rte_log.h>
21 #include <rte_debug.h>
22 #include <rte_interrupts.h>
23 #include <rte_pci.h>
24 #include <rte_memory.h>
25 #include <rte_memzone.h>
26 #include <rte_launch.h>
27 #include <rte_eal.h>
28 #include <rte_per_lcore.h>
29 #include <rte_lcore.h>
30 #include <rte_atomic.h>
31 #include <rte_branch_prediction.h>
32 #include <rte_mempool.h>
33 #include <rte_malloc.h>
34 #include <rte_mbuf.h>
35 #include <rte_ether.h>
36 #include <rte_ethdev_driver.h>
37 #include <rte_security_driver.h>
38 #include <rte_prefetch.h>
39 #include <rte_udp.h>
40 #include <rte_tcp.h>
41 #include <rte_sctp.h>
42 #include <rte_string_fns.h>
43 #include <rte_errno.h>
44 #include <rte_ip.h>
45 #include <rte_net.h>
46 #include <rte_vect.h>
47
48 #include "ixgbe_logs.h"
49 #include "base/ixgbe_api.h"
50 #include "base/ixgbe_vf.h"
51 #include "ixgbe_ethdev.h"
52 #include "base/ixgbe_dcb.h"
53 #include "base/ixgbe_common.h"
54 #include "ixgbe_rxtx.h"
55
56 #ifdef RTE_LIBRTE_IEEE1588
57 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
58 #else
59 #define IXGBE_TX_IEEE1588_TMST 0
60 #endif
61 /* Bit Mask to indicate what bits required for building TX context */
62 #define IXGBE_TX_OFFLOAD_MASK (                  \
63                 PKT_TX_OUTER_IPV6 |              \
64                 PKT_TX_OUTER_IPV4 |              \
65                 PKT_TX_IPV6 |                    \
66                 PKT_TX_IPV4 |                    \
67                 PKT_TX_VLAN_PKT |                \
68                 PKT_TX_IP_CKSUM |                \
69                 PKT_TX_L4_MASK |                 \
70                 PKT_TX_TCP_SEG |                 \
71                 PKT_TX_MACSEC |                  \
72                 PKT_TX_OUTER_IP_CKSUM |          \
73                 PKT_TX_SEC_OFFLOAD |     \
74                 IXGBE_TX_IEEE1588_TMST)
75
76 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
77                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
78
79 #if 1
80 #define RTE_PMD_USE_PREFETCH
81 #endif
82
83 #ifdef RTE_PMD_USE_PREFETCH
84 /*
85  * Prefetch a cache line into all cache levels.
86  */
87 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
88 #else
89 #define rte_ixgbe_prefetch(p)   do {} while (0)
90 #endif
91
92 /*********************************************************************
93  *
94  *  TX functions
95  *
96  **********************************************************************/
97
98 /*
99  * Check for descriptors with their DD bit set and free mbufs.
100  * Return the total number of buffers freed.
101  */
102 static __rte_always_inline int
103 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
104 {
105         struct ixgbe_tx_entry *txep;
106         uint32_t status;
107         int i, nb_free = 0;
108         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
109
110         /* check DD bit on threshold descriptor */
111         status = txq->tx_ring[txq->tx_next_dd].wb.status;
112         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
113                 return 0;
114
115         /*
116          * first buffer to free from S/W ring is at index
117          * tx_next_dd - (tx_rs_thresh-1)
118          */
119         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
120
121         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
122                 /* free buffers one at a time */
123                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
124                 txep->mbuf = NULL;
125
126                 if (unlikely(m == NULL))
127                         continue;
128
129                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
130                     (nb_free > 0 && m->pool != free[0]->pool)) {
131                         rte_mempool_put_bulk(free[0]->pool,
132                                              (void **)free, nb_free);
133                         nb_free = 0;
134                 }
135
136                 free[nb_free++] = m;
137         }
138
139         if (nb_free > 0)
140                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
141
142         /* buffers were freed, update counters */
143         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
144         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
145         if (txq->tx_next_dd >= txq->nb_tx_desc)
146                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
147
148         return txq->tx_rs_thresh;
149 }
150
151 /* Populate 4 descriptors with data from 4 mbufs */
152 static inline void
153 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
154 {
155         uint64_t buf_dma_addr;
156         uint32_t pkt_len;
157         int i;
158
159         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
160                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
161                 pkt_len = (*pkts)->data_len;
162
163                 /* write data to descriptor */
164                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
165
166                 txdp->read.cmd_type_len =
167                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
168
169                 txdp->read.olinfo_status =
170                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
171
172                 rte_prefetch0(&(*pkts)->pool);
173         }
174 }
175
176 /* Populate 1 descriptor with data from 1 mbuf */
177 static inline void
178 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
179 {
180         uint64_t buf_dma_addr;
181         uint32_t pkt_len;
182
183         buf_dma_addr = rte_mbuf_data_iova(*pkts);
184         pkt_len = (*pkts)->data_len;
185
186         /* write data to descriptor */
187         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
188         txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190         txdp->read.olinfo_status =
191                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
192         rte_prefetch0(&(*pkts)->pool);
193 }
194
195 /*
196  * Fill H/W descriptor ring with mbuf data.
197  * Copy mbuf pointers to the S/W ring.
198  */
199 static inline void
200 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
201                       uint16_t nb_pkts)
202 {
203         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
204         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
205         const int N_PER_LOOP = 4;
206         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
207         int mainpart, leftover;
208         int i, j;
209
210         /*
211          * Process most of the packets in chunks of N pkts.  Any
212          * leftover packets will get processed one at a time.
213          */
214         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
215         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
216         for (i = 0; i < mainpart; i += N_PER_LOOP) {
217                 /* Copy N mbuf pointers to the S/W ring */
218                 for (j = 0; j < N_PER_LOOP; ++j) {
219                         (txep + i + j)->mbuf = *(pkts + i + j);
220                 }
221                 tx4(txdp + i, pkts + i);
222         }
223
224         if (unlikely(leftover > 0)) {
225                 for (i = 0; i < leftover; ++i) {
226                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
227                         tx1(txdp + mainpart + i, pkts + mainpart + i);
228                 }
229         }
230 }
231
232 static inline uint16_t
233 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
234              uint16_t nb_pkts)
235 {
236         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
237         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
238         uint16_t n = 0;
239
240         /*
241          * Begin scanning the H/W ring for done descriptors when the
242          * number of available descriptors drops below tx_free_thresh.  For
243          * each done descriptor, free the associated buffer.
244          */
245         if (txq->nb_tx_free < txq->tx_free_thresh)
246                 ixgbe_tx_free_bufs(txq);
247
248         /* Only use descriptors that are available */
249         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
250         if (unlikely(nb_pkts == 0))
251                 return 0;
252
253         /* Use exactly nb_pkts descriptors */
254         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
255
256         /*
257          * At this point, we know there are enough descriptors in the
258          * ring to transmit all the packets.  This assumes that each
259          * mbuf contains a single segment, and that no new offloads
260          * are expected, which would require a new context descriptor.
261          */
262
263         /*
264          * See if we're going to wrap-around. If so, handle the top
265          * of the descriptor ring first, then do the bottom.  If not,
266          * the processing looks just like the "bottom" part anyway...
267          */
268         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
269                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
270                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
271
272                 /*
273                  * We know that the last descriptor in the ring will need to
274                  * have its RS bit set because tx_rs_thresh has to be
275                  * a divisor of the ring size
276                  */
277                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
278                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
279                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
280
281                 txq->tx_tail = 0;
282         }
283
284         /* Fill H/W descriptor ring with mbuf data */
285         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
286         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
287
288         /*
289          * Determine if RS bit should be set
290          * This is what we actually want:
291          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
292          * but instead of subtracting 1 and doing >=, we can just do
293          * greater than without subtracting.
294          */
295         if (txq->tx_tail > txq->tx_next_rs) {
296                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
297                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
298                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
299                                                 txq->tx_rs_thresh);
300                 if (txq->tx_next_rs >= txq->nb_tx_desc)
301                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302         }
303
304         /*
305          * Check for wrap-around. This would only happen if we used
306          * up to the last descriptor in the ring, no more, no less.
307          */
308         if (txq->tx_tail >= txq->nb_tx_desc)
309                 txq->tx_tail = 0;
310
311         /* update tail pointer */
312         rte_wmb();
313         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
314
315         return nb_pkts;
316 }
317
318 uint16_t
319 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
320                        uint16_t nb_pkts)
321 {
322         uint16_t nb_tx;
323
324         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
325         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
326                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
327
328         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
329         nb_tx = 0;
330         while (nb_pkts) {
331                 uint16_t ret, n;
332
333                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
334                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
335                 nb_tx = (uint16_t)(nb_tx + ret);
336                 nb_pkts = (uint16_t)(nb_pkts - ret);
337                 if (ret < n)
338                         break;
339         }
340
341         return nb_tx;
342 }
343
344 static uint16_t
345 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
346                     uint16_t nb_pkts)
347 {
348         uint16_t nb_tx = 0;
349         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
350
351         while (nb_pkts) {
352                 uint16_t ret, num;
353
354                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
355                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
356                                                  num);
357                 nb_tx += ret;
358                 nb_pkts -= ret;
359                 if (ret < num)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
370                 __rte_unused uint64_t *mdata)
371 {
372         uint32_t type_tucmd_mlhl;
373         uint32_t mss_l4len_idx = 0;
374         uint32_t ctx_idx;
375         uint32_t vlan_macip_lens;
376         union ixgbe_tx_offload tx_offload_mask;
377         uint32_t seqnum_seed = 0;
378
379         ctx_idx = txq->ctx_curr;
380         tx_offload_mask.data[0] = 0;
381         tx_offload_mask.data[1] = 0;
382         type_tucmd_mlhl = 0;
383
384         /* Specify which HW CTX to upload. */
385         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
386
387         if (ol_flags & PKT_TX_VLAN_PKT) {
388                 tx_offload_mask.vlan_tci |= ~0;
389         }
390
391         /* check if TCP segmentation required for this packet */
392         if (ol_flags & PKT_TX_TCP_SEG) {
393                 /* implies IP cksum in IPv4 */
394                 if (ol_flags & PKT_TX_IP_CKSUM)
395                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
396                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
397                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
398                 else
399                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
400                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
401                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
402
403                 tx_offload_mask.l2_len |= ~0;
404                 tx_offload_mask.l3_len |= ~0;
405                 tx_offload_mask.l4_len |= ~0;
406                 tx_offload_mask.tso_segsz |= ~0;
407                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
408                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
409         } else { /* no TSO, check if hardware checksum is needed */
410                 if (ol_flags & PKT_TX_IP_CKSUM) {
411                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
412                         tx_offload_mask.l2_len |= ~0;
413                         tx_offload_mask.l3_len |= ~0;
414                 }
415
416                 switch (ol_flags & PKT_TX_L4_MASK) {
417                 case PKT_TX_UDP_CKSUM:
418                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
419                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
420                         mss_l4len_idx |= sizeof(struct rte_udp_hdr)
421                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
422                         tx_offload_mask.l2_len |= ~0;
423                         tx_offload_mask.l3_len |= ~0;
424                         break;
425                 case PKT_TX_TCP_CKSUM:
426                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                         mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
429                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
430                         tx_offload_mask.l2_len |= ~0;
431                         tx_offload_mask.l3_len |= ~0;
432                         break;
433                 case PKT_TX_SCTP_CKSUM:
434                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
435                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
436                         mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
437                                 << IXGBE_ADVTXD_L4LEN_SHIFT;
438                         tx_offload_mask.l2_len |= ~0;
439                         tx_offload_mask.l3_len |= ~0;
440                         break;
441                 default:
442                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
443                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
444                         break;
445                 }
446         }
447
448         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
449                 tx_offload_mask.outer_l2_len |= ~0;
450                 tx_offload_mask.outer_l3_len |= ~0;
451                 tx_offload_mask.l2_len |= ~0;
452                 seqnum_seed |= tx_offload.outer_l3_len
453                                << IXGBE_ADVTXD_OUTER_IPLEN;
454                 seqnum_seed |= tx_offload.l2_len
455                                << IXGBE_ADVTXD_TUNNEL_LEN;
456         }
457 #ifdef RTE_LIB_SECURITY
458         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
459                 union ixgbe_crypto_tx_desc_md *md =
460                                 (union ixgbe_crypto_tx_desc_md *)mdata;
461                 seqnum_seed |=
462                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
463                 type_tucmd_mlhl |= md->enc ?
464                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
465                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
466                 type_tucmd_mlhl |=
467                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
468                 tx_offload_mask.sa_idx |= ~0;
469                 tx_offload_mask.sec_pad_len |= ~0;
470         }
471 #endif
472
473         txq->ctx_cache[ctx_idx].flags = ol_flags;
474         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
475                 tx_offload_mask.data[0] & tx_offload.data[0];
476         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
477                 tx_offload_mask.data[1] & tx_offload.data[1];
478         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
479
480         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
481         vlan_macip_lens = tx_offload.l3_len;
482         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
483                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
484                                     IXGBE_ADVTXD_MACLEN_SHIFT);
485         else
486                 vlan_macip_lens |= (tx_offload.l2_len <<
487                                     IXGBE_ADVTXD_MACLEN_SHIFT);
488         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
489         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
490         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
491         ctx_txd->seqnum_seed     = seqnum_seed;
492 }
493
494 /*
495  * Check which hardware context can be used. Use the existing match
496  * or create a new context descriptor.
497  */
498 static inline uint32_t
499 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
500                    union ixgbe_tx_offload tx_offload)
501 {
502         /* If match with the current used context */
503         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
504                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
505                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
506                      & tx_offload.data[0])) &&
507                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
508                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
509                      & tx_offload.data[1]))))
510                 return txq->ctx_curr;
511
512         /* What if match with the next context  */
513         txq->ctx_curr ^= 1;
514         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
515                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
516                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
517                      & tx_offload.data[0])) &&
518                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
519                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
520                      & tx_offload.data[1]))))
521                 return txq->ctx_curr;
522
523         /* Mismatch, use the previous context */
524         return IXGBE_CTX_NUM;
525 }
526
527 static inline uint32_t
528 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
529 {
530         uint32_t tmp = 0;
531
532         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
533                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
534         if (ol_flags & PKT_TX_IP_CKSUM)
535                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
536         if (ol_flags & PKT_TX_TCP_SEG)
537                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
538         return tmp;
539 }
540
541 static inline uint32_t
542 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
543 {
544         uint32_t cmdtype = 0;
545
546         if (ol_flags & PKT_TX_VLAN_PKT)
547                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
548         if (ol_flags & PKT_TX_TCP_SEG)
549                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
550         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
551                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
552         if (ol_flags & PKT_TX_MACSEC)
553                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
554         return cmdtype;
555 }
556
557 /* Default RS bit threshold values */
558 #ifndef DEFAULT_TX_RS_THRESH
559 #define DEFAULT_TX_RS_THRESH   32
560 #endif
561 #ifndef DEFAULT_TX_FREE_THRESH
562 #define DEFAULT_TX_FREE_THRESH 32
563 #endif
564
565 /* Reset transmit descriptors after they have been used */
566 static inline int
567 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
568 {
569         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
570         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
571         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
572         uint16_t nb_tx_desc = txq->nb_tx_desc;
573         uint16_t desc_to_clean_to;
574         uint16_t nb_tx_to_clean;
575         uint32_t status;
576
577         /* Determine the last descriptor needing to be cleaned */
578         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
579         if (desc_to_clean_to >= nb_tx_desc)
580                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
581
582         /* Check to make sure the last descriptor to clean is done */
583         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
584         status = txr[desc_to_clean_to].wb.status;
585         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
586                 PMD_TX_FREE_LOG(DEBUG,
587                                 "TX descriptor %4u is not done"
588                                 "(port=%d queue=%d)",
589                                 desc_to_clean_to,
590                                 txq->port_id, txq->queue_id);
591                 /* Failed to clean any descriptors, better luck next time */
592                 return -(1);
593         }
594
595         /* Figure out how many descriptors will be cleaned */
596         if (last_desc_cleaned > desc_to_clean_to)
597                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
598                                                         desc_to_clean_to);
599         else
600                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
601                                                 last_desc_cleaned);
602
603         PMD_TX_FREE_LOG(DEBUG,
604                         "Cleaning %4u TX descriptors: %4u to %4u "
605                         "(port=%d queue=%d)",
606                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
607                         txq->port_id, txq->queue_id);
608
609         /*
610          * The last descriptor to clean is done, so that means all the
611          * descriptors from the last descriptor that was cleaned
612          * up to the last descriptor with the RS bit set
613          * are done. Only reset the threshold descriptor.
614          */
615         txr[desc_to_clean_to].wb.status = 0;
616
617         /* Update the txq to reflect the last descriptor that was cleaned */
618         txq->last_desc_cleaned = desc_to_clean_to;
619         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
620
621         /* No Error */
622         return 0;
623 }
624
625 uint16_t
626 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
627                 uint16_t nb_pkts)
628 {
629         struct ixgbe_tx_queue *txq;
630         struct ixgbe_tx_entry *sw_ring;
631         struct ixgbe_tx_entry *txe, *txn;
632         volatile union ixgbe_adv_tx_desc *txr;
633         volatile union ixgbe_adv_tx_desc *txd, *txp;
634         struct rte_mbuf     *tx_pkt;
635         struct rte_mbuf     *m_seg;
636         uint64_t buf_dma_addr;
637         uint32_t olinfo_status;
638         uint32_t cmd_type_len;
639         uint32_t pkt_len;
640         uint16_t slen;
641         uint64_t ol_flags;
642         uint16_t tx_id;
643         uint16_t tx_last;
644         uint16_t nb_tx;
645         uint16_t nb_used;
646         uint64_t tx_ol_req;
647         uint32_t ctx = 0;
648         uint32_t new_ctx;
649         union ixgbe_tx_offload tx_offload;
650 #ifdef RTE_LIB_SECURITY
651         uint8_t use_ipsec;
652 #endif
653
654         tx_offload.data[0] = 0;
655         tx_offload.data[1] = 0;
656         txq = tx_queue;
657         sw_ring = txq->sw_ring;
658         txr     = txq->tx_ring;
659         tx_id   = txq->tx_tail;
660         txe = &sw_ring[tx_id];
661         txp = NULL;
662
663         /* Determine if the descriptor ring needs to be cleaned. */
664         if (txq->nb_tx_free < txq->tx_free_thresh)
665                 ixgbe_xmit_cleanup(txq);
666
667         rte_prefetch0(&txe->mbuf->pool);
668
669         /* TX loop */
670         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
671                 new_ctx = 0;
672                 tx_pkt = *tx_pkts++;
673                 pkt_len = tx_pkt->pkt_len;
674
675                 /*
676                  * Determine how many (if any) context descriptors
677                  * are needed for offload functionality.
678                  */
679                 ol_flags = tx_pkt->ol_flags;
680 #ifdef RTE_LIB_SECURITY
681                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
682 #endif
683
684                 /* If hardware offload required */
685                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
686                 if (tx_ol_req) {
687                         tx_offload.l2_len = tx_pkt->l2_len;
688                         tx_offload.l3_len = tx_pkt->l3_len;
689                         tx_offload.l4_len = tx_pkt->l4_len;
690                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
691                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
692                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
693                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
694 #ifdef RTE_LIB_SECURITY
695                         if (use_ipsec) {
696                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
697                                         (union ixgbe_crypto_tx_desc_md *)
698                                                 rte_security_dynfield(tx_pkt);
699                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
700                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
701                         }
702 #endif
703
704                         /* If new context need be built or reuse the exist ctx. */
705                         ctx = what_advctx_update(txq, tx_ol_req,
706                                 tx_offload);
707                         /* Only allocate context descriptor if required*/
708                         new_ctx = (ctx == IXGBE_CTX_NUM);
709                         ctx = txq->ctx_curr;
710                 }
711
712                 /*
713                  * Keep track of how many descriptors are used this loop
714                  * This will always be the number of segments + the number of
715                  * Context descriptors required to transmit the packet
716                  */
717                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
718
719                 if (txp != NULL &&
720                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
721                         /* set RS on the previous packet in the burst */
722                         txp->read.cmd_type_len |=
723                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
724
725                 /*
726                  * The number of descriptors that must be allocated for a
727                  * packet is the number of segments of that packet, plus 1
728                  * Context Descriptor for the hardware offload, if any.
729                  * Determine the last TX descriptor to allocate in the TX ring
730                  * for the packet, starting from the current position (tx_id)
731                  * in the ring.
732                  */
733                 tx_last = (uint16_t) (tx_id + nb_used - 1);
734
735                 /* Circular ring */
736                 if (tx_last >= txq->nb_tx_desc)
737                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
738
739                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
740                            " tx_first=%u tx_last=%u",
741                            (unsigned) txq->port_id,
742                            (unsigned) txq->queue_id,
743                            (unsigned) pkt_len,
744                            (unsigned) tx_id,
745                            (unsigned) tx_last);
746
747                 /*
748                  * Make sure there are enough TX descriptors available to
749                  * transmit the entire packet.
750                  * nb_used better be less than or equal to txq->tx_rs_thresh
751                  */
752                 if (nb_used > txq->nb_tx_free) {
753                         PMD_TX_FREE_LOG(DEBUG,
754                                         "Not enough free TX descriptors "
755                                         "nb_used=%4u nb_free=%4u "
756                                         "(port=%d queue=%d)",
757                                         nb_used, txq->nb_tx_free,
758                                         txq->port_id, txq->queue_id);
759
760                         if (ixgbe_xmit_cleanup(txq) != 0) {
761                                 /* Could not clean any descriptors */
762                                 if (nb_tx == 0)
763                                         return 0;
764                                 goto end_of_tx;
765                         }
766
767                         /* nb_used better be <= txq->tx_rs_thresh */
768                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
769                                 PMD_TX_FREE_LOG(DEBUG,
770                                         "The number of descriptors needed to "
771                                         "transmit the packet exceeds the "
772                                         "RS bit threshold. This will impact "
773                                         "performance."
774                                         "nb_used=%4u nb_free=%4u "
775                                         "tx_rs_thresh=%4u. "
776                                         "(port=%d queue=%d)",
777                                         nb_used, txq->nb_tx_free,
778                                         txq->tx_rs_thresh,
779                                         txq->port_id, txq->queue_id);
780                                 /*
781                                  * Loop here until there are enough TX
782                                  * descriptors or until the ring cannot be
783                                  * cleaned.
784                                  */
785                                 while (nb_used > txq->nb_tx_free) {
786                                         if (ixgbe_xmit_cleanup(txq) != 0) {
787                                                 /*
788                                                  * Could not clean any
789                                                  * descriptors
790                                                  */
791                                                 if (nb_tx == 0)
792                                                         return 0;
793                                                 goto end_of_tx;
794                                         }
795                                 }
796                         }
797                 }
798
799                 /*
800                  * By now there are enough free TX descriptors to transmit
801                  * the packet.
802                  */
803
804                 /*
805                  * Set common flags of all TX Data Descriptors.
806                  *
807                  * The following bits must be set in all Data Descriptors:
808                  *   - IXGBE_ADVTXD_DTYP_DATA
809                  *   - IXGBE_ADVTXD_DCMD_DEXT
810                  *
811                  * The following bits must be set in the first Data Descriptor
812                  * and are ignored in the other ones:
813                  *   - IXGBE_ADVTXD_DCMD_IFCS
814                  *   - IXGBE_ADVTXD_MAC_1588
815                  *   - IXGBE_ADVTXD_DCMD_VLE
816                  *
817                  * The following bits must only be set in the last Data
818                  * Descriptor:
819                  *   - IXGBE_TXD_CMD_EOP
820                  *
821                  * The following bits can be set in any Data Descriptor, but
822                  * are only set in the last Data Descriptor:
823                  *   - IXGBE_TXD_CMD_RS
824                  */
825                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
826                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
827
828 #ifdef RTE_LIBRTE_IEEE1588
829                 if (ol_flags & PKT_TX_IEEE1588_TMST)
830                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
831 #endif
832
833                 olinfo_status = 0;
834                 if (tx_ol_req) {
835
836                         if (ol_flags & PKT_TX_TCP_SEG) {
837                                 /* when TSO is on, paylen in descriptor is the
838                                  * not the packet len but the tcp payload len */
839                                 pkt_len -= (tx_offload.l2_len +
840                                         tx_offload.l3_len + tx_offload.l4_len);
841                         }
842
843                         /*
844                          * Setup the TX Advanced Context Descriptor if required
845                          */
846                         if (new_ctx) {
847                                 volatile struct ixgbe_adv_tx_context_desc *
848                                     ctx_txd;
849
850                                 ctx_txd = (volatile struct
851                                     ixgbe_adv_tx_context_desc *)
852                                     &txr[tx_id];
853
854                                 txn = &sw_ring[txe->next_id];
855                                 rte_prefetch0(&txn->mbuf->pool);
856
857                                 if (txe->mbuf != NULL) {
858                                         rte_pktmbuf_free_seg(txe->mbuf);
859                                         txe->mbuf = NULL;
860                                 }
861
862                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
863                                         tx_offload,
864                                         rte_security_dynfield(tx_pkt));
865
866                                 txe->last_id = tx_last;
867                                 tx_id = txe->next_id;
868                                 txe = txn;
869                         }
870
871                         /*
872                          * Setup the TX Advanced Data Descriptor,
873                          * This path will go through
874                          * whatever new/reuse the context descriptor
875                          */
876                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
877                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
878                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
879                 }
880
881                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
882 #ifdef RTE_LIB_SECURITY
883                 if (use_ipsec)
884                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
885 #endif
886
887                 m_seg = tx_pkt;
888                 do {
889                         txd = &txr[tx_id];
890                         txn = &sw_ring[txe->next_id];
891                         rte_prefetch0(&txn->mbuf->pool);
892
893                         if (txe->mbuf != NULL)
894                                 rte_pktmbuf_free_seg(txe->mbuf);
895                         txe->mbuf = m_seg;
896
897                         /*
898                          * Set up Transmit Data Descriptor.
899                          */
900                         slen = m_seg->data_len;
901                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
902                         txd->read.buffer_addr =
903                                 rte_cpu_to_le_64(buf_dma_addr);
904                         txd->read.cmd_type_len =
905                                 rte_cpu_to_le_32(cmd_type_len | slen);
906                         txd->read.olinfo_status =
907                                 rte_cpu_to_le_32(olinfo_status);
908                         txe->last_id = tx_last;
909                         tx_id = txe->next_id;
910                         txe = txn;
911                         m_seg = m_seg->next;
912                 } while (m_seg != NULL);
913
914                 /*
915                  * The last packet data descriptor needs End Of Packet (EOP)
916                  */
917                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
918                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
919                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
920
921                 /* Set RS bit only on threshold packets' last descriptor */
922                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
923                         PMD_TX_FREE_LOG(DEBUG,
924                                         "Setting RS bit on TXD id="
925                                         "%4u (port=%d queue=%d)",
926                                         tx_last, txq->port_id, txq->queue_id);
927
928                         cmd_type_len |= IXGBE_TXD_CMD_RS;
929
930                         /* Update txq RS bit counters */
931                         txq->nb_tx_used = 0;
932                         txp = NULL;
933                 } else
934                         txp = txd;
935
936                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
937         }
938
939 end_of_tx:
940         /* set RS on last packet in the burst */
941         if (txp != NULL)
942                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
943
944         rte_wmb();
945
946         /*
947          * Set the Transmit Descriptor Tail (TDT)
948          */
949         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
950                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
951                    (unsigned) tx_id, (unsigned) nb_tx);
952         IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
953         txq->tx_tail = tx_id;
954
955         return nb_tx;
956 }
957
958 /*********************************************************************
959  *
960  *  TX prep functions
961  *
962  **********************************************************************/
963 uint16_t
964 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
965 {
966         int i, ret;
967         uint64_t ol_flags;
968         struct rte_mbuf *m;
969         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
970
971         for (i = 0; i < nb_pkts; i++) {
972                 m = tx_pkts[i];
973                 ol_flags = m->ol_flags;
974
975                 /**
976                  * Check if packet meets requirements for number of segments
977                  *
978                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
979                  *       non-TSO
980                  */
981
982                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
983                         rte_errno = EINVAL;
984                         return i;
985                 }
986
987                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
988                         rte_errno = ENOTSUP;
989                         return i;
990                 }
991
992                 /* check the size of packet */
993                 if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
994                         rte_errno = EINVAL;
995                         return i;
996                 }
997
998 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
999                 ret = rte_validate_tx_offload(m);
1000                 if (ret != 0) {
1001                         rte_errno = -ret;
1002                         return i;
1003                 }
1004 #endif
1005                 ret = rte_net_intel_cksum_prepare(m);
1006                 if (ret != 0) {
1007                         rte_errno = -ret;
1008                         return i;
1009                 }
1010         }
1011
1012         return i;
1013 }
1014
1015 /*********************************************************************
1016  *
1017  *  RX functions
1018  *
1019  **********************************************************************/
1020
1021 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1022 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1023 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1024 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1025 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1030 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1031 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1032 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1033 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1034 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1035 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1036 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1037 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1038 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1039 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1040 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1041 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1042 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1043 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1044 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1045 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1046 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1047 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1048 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1049 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1050 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1051 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1052 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1053 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1054
1055 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1062 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1063 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1064 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1065 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1066 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1067 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1068 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1069 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1070 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1071 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1072 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1073 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1074 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1075 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1078
1079 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1086 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1087 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1088 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1089 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1090 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1091 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1092 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1093 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1094 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1095 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1096 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1097 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1098 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1099 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1102
1103 /**
1104  * Use 2 different table for normal packet and tunnel packet
1105  * to save the space.
1106  */
1107 const uint32_t
1108         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1109         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1110         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1111                 RTE_PTYPE_L3_IPV4,
1112         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1113                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1114         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1115                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1116         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1117                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1118         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1119                 RTE_PTYPE_L3_IPV4_EXT,
1120         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1121                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1122         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1123                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1124         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1125                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1126         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1127                 RTE_PTYPE_L3_IPV6,
1128         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1129                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1130         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1132         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1134         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV6_EXT,
1136         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1138         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1140         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1142         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                 RTE_PTYPE_INNER_L3_IPV6,
1145         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1146                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1148         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1151         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1152                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1153                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1154         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                 RTE_PTYPE_INNER_L3_IPV6,
1157         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1158                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1160         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1163         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1164                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1165                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1166         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1167                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1169         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1172         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1173                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1175         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1176                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1177                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1178         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1179                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1181         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1182                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1184         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1185                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1187         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1188                 RTE_PTYPE_L2_ETHER |
1189                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1191 };
1192
1193 const uint32_t
1194         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1195         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                 RTE_PTYPE_INNER_L2_ETHER,
1198         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1204         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1207         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1208                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1210         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1213         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1214                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1216         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1217                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1219                 RTE_PTYPE_INNER_L4_TCP,
1220         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1221                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1223                 RTE_PTYPE_INNER_L4_TCP,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1227         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1230                 RTE_PTYPE_INNER_L4_TCP,
1231         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1232                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1233                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1234                 RTE_PTYPE_INNER_L3_IPV4,
1235         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1236                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1238                 RTE_PTYPE_INNER_L4_UDP,
1239         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1242                 RTE_PTYPE_INNER_L4_UDP,
1243         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1244                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1246                 RTE_PTYPE_INNER_L4_SCTP,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1250         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1251                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1253                 RTE_PTYPE_INNER_L4_UDP,
1254         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1255                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1257                 RTE_PTYPE_INNER_L4_SCTP,
1258         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1259                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1260                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1261                 RTE_PTYPE_INNER_L3_IPV4,
1262         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1263                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1265                 RTE_PTYPE_INNER_L4_SCTP,
1266         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1267                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                 RTE_PTYPE_INNER_L4_SCTP,
1270         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1273                 RTE_PTYPE_INNER_L4_TCP,
1274         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1275                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1277                 RTE_PTYPE_INNER_L4_UDP,
1278
1279         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1280                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1282         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1283                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                 RTE_PTYPE_INNER_L3_IPV4,
1286         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1290         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                 RTE_PTYPE_INNER_L3_IPV6,
1294         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                 RTE_PTYPE_INNER_L3_IPV4,
1298         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1299                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1302         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1303                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                 RTE_PTYPE_INNER_L3_IPV4,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1307                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV4,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1319                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1323                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1324                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1325                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1335                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV4,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1343                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1347                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1351                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1352                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1353                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1354         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1355                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1358         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1359                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1362         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1363                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1366         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1367                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1370 };
1371
1372 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1373 static inline uint32_t
1374 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1375 {
1376
1377         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1378                 return RTE_PTYPE_UNKNOWN;
1379
1380         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1381
1382         /* For tunnel packet */
1383         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1384                 /* Remove the tunnel bit to save the space. */
1385                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1386                 return ptype_table_tn[pkt_info];
1387         }
1388
1389         /**
1390          * For x550, if it's not tunnel,
1391          * tunnel type bit should be set to 0.
1392          * Reuse 82599's mask.
1393          */
1394         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1395
1396         return ptype_table[pkt_info];
1397 }
1398
1399 static inline uint64_t
1400 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1401 {
1402         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1403                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1404                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1405                 PKT_RX_RSS_HASH, 0, 0, 0,
1406                 0, 0, 0,  PKT_RX_FDIR,
1407         };
1408 #ifdef RTE_LIBRTE_IEEE1588
1409         static uint64_t ip_pkt_etqf_map[8] = {
1410                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1411                 0, 0, 0, 0,
1412         };
1413
1414         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1415                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1416                                 ip_rss_types_map[pkt_info & 0XF];
1417         else
1418                 return ip_rss_types_map[pkt_info & 0XF];
1419 #else
1420         return ip_rss_types_map[pkt_info & 0XF];
1421 #endif
1422 }
1423
1424 static inline uint64_t
1425 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1426 {
1427         uint64_t pkt_flags;
1428
1429         /*
1430          * Check if VLAN present only.
1431          * Do not check whether L3/L4 rx checksum done by NIC or not,
1432          * That can be found from rte_eth_rxmode.offloads flag
1433          */
1434         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1435
1436 #ifdef RTE_LIBRTE_IEEE1588
1437         if (rx_status & IXGBE_RXD_STAT_TMST)
1438                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1439 #endif
1440         return pkt_flags;
1441 }
1442
1443 static inline uint64_t
1444 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1445 {
1446         uint64_t pkt_flags;
1447
1448         /*
1449          * Bit 31: IPE, IPv4 checksum error
1450          * Bit 30: L4I, L4I integrity error
1451          */
1452         static uint64_t error_to_pkt_flags_map[4] = {
1453                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1454                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1455                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1456                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1457         };
1458         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1459                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1460
1461         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1462             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1463                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1464         }
1465
1466 #ifdef RTE_LIB_SECURITY
1467         if (rx_status & IXGBE_RXD_STAT_SECP) {
1468                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1469                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1470                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1471         }
1472 #endif
1473
1474         return pkt_flags;
1475 }
1476
1477 /*
1478  * LOOK_AHEAD defines how many desc statuses to check beyond the
1479  * current descriptor.
1480  * It must be a pound define for optimal performance.
1481  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1482  * function only works with LOOK_AHEAD=8.
1483  */
1484 #define LOOK_AHEAD 8
1485 #if (LOOK_AHEAD != 8)
1486 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1487 #endif
1488 static inline int
1489 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1490 {
1491         volatile union ixgbe_adv_rx_desc *rxdp;
1492         struct ixgbe_rx_entry *rxep;
1493         struct rte_mbuf *mb;
1494         uint16_t pkt_len;
1495         uint64_t pkt_flags;
1496         int nb_dd;
1497         uint32_t s[LOOK_AHEAD];
1498         uint32_t pkt_info[LOOK_AHEAD];
1499         int i, j, nb_rx = 0;
1500         uint32_t status;
1501         uint64_t vlan_flags = rxq->vlan_flags;
1502
1503         /* get references to current descriptor and S/W ring entry */
1504         rxdp = &rxq->rx_ring[rxq->rx_tail];
1505         rxep = &rxq->sw_ring[rxq->rx_tail];
1506
1507         status = rxdp->wb.upper.status_error;
1508         /* check to make sure there is at least 1 packet to receive */
1509         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1510                 return 0;
1511
1512         /*
1513          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1514          * reference packets that are ready to be received.
1515          */
1516         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1517              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1518                 /* Read desc statuses backwards to avoid race condition */
1519                 for (j = 0; j < LOOK_AHEAD; j++)
1520                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1521
1522                 rte_smp_rmb();
1523
1524                 /* Compute how many status bits were set */
1525                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1526                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1527                         ;
1528
1529                 for (j = 0; j < nb_dd; j++)
1530                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1531                                                        lo_dword.data);
1532
1533                 nb_rx += nb_dd;
1534
1535                 /* Translate descriptor info to mbuf format */
1536                 for (j = 0; j < nb_dd; ++j) {
1537                         mb = rxep[j].mbuf;
1538                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1539                                   rxq->crc_len;
1540                         mb->data_len = pkt_len;
1541                         mb->pkt_len = pkt_len;
1542                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1543
1544                         /* convert descriptor fields to rte mbuf flags */
1545                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1546                                 vlan_flags);
1547                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1548                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1549                                         ((uint16_t)pkt_info[j]);
1550                         mb->ol_flags = pkt_flags;
1551                         mb->packet_type =
1552                                 ixgbe_rxd_pkt_info_to_pkt_type
1553                                         (pkt_info[j], rxq->pkt_type_mask);
1554
1555                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1556                                 mb->hash.rss = rte_le_to_cpu_32(
1557                                     rxdp[j].wb.lower.hi_dword.rss);
1558                         else if (pkt_flags & PKT_RX_FDIR) {
1559                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1560                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1561                                     IXGBE_ATR_HASH_MASK;
1562                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1563                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1564                         }
1565                 }
1566
1567                 /* Move mbuf pointers from the S/W ring to the stage */
1568                 for (j = 0; j < LOOK_AHEAD; ++j) {
1569                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1570                 }
1571
1572                 /* stop if all requested packets could not be received */
1573                 if (nb_dd != LOOK_AHEAD)
1574                         break;
1575         }
1576
1577         /* clear software ring entries so we can cleanup correctly */
1578         for (i = 0; i < nb_rx; ++i) {
1579                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1580         }
1581
1582
1583         return nb_rx;
1584 }
1585
1586 static inline int
1587 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1588 {
1589         volatile union ixgbe_adv_rx_desc *rxdp;
1590         struct ixgbe_rx_entry *rxep;
1591         struct rte_mbuf *mb;
1592         uint16_t alloc_idx;
1593         __le64 dma_addr;
1594         int diag, i;
1595
1596         /* allocate buffers in bulk directly into the S/W ring */
1597         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1598         rxep = &rxq->sw_ring[alloc_idx];
1599         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1600                                     rxq->rx_free_thresh);
1601         if (unlikely(diag != 0))
1602                 return -ENOMEM;
1603
1604         rxdp = &rxq->rx_ring[alloc_idx];
1605         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1606                 /* populate the static rte mbuf fields */
1607                 mb = rxep[i].mbuf;
1608                 if (reset_mbuf) {
1609                         mb->port = rxq->port_id;
1610                 }
1611
1612                 rte_mbuf_refcnt_set(mb, 1);
1613                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1614
1615                 /* populate the descriptors */
1616                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1617                 rxdp[i].read.hdr_addr = 0;
1618                 rxdp[i].read.pkt_addr = dma_addr;
1619         }
1620
1621         /* update state of internal queue structure */
1622         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1623         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1624                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1625
1626         /* no errors */
1627         return 0;
1628 }
1629
1630 static inline uint16_t
1631 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1632                          uint16_t nb_pkts)
1633 {
1634         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1635         int i;
1636
1637         /* how many packets are ready to return? */
1638         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1639
1640         /* copy mbuf pointers to the application's packet list */
1641         for (i = 0; i < nb_pkts; ++i)
1642                 rx_pkts[i] = stage[i];
1643
1644         /* update internal queue state */
1645         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1646         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1647
1648         return nb_pkts;
1649 }
1650
1651 static inline uint16_t
1652 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1653              uint16_t nb_pkts)
1654 {
1655         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1656         uint16_t nb_rx = 0;
1657
1658         /* Any previously recv'd pkts will be returned from the Rx stage */
1659         if (rxq->rx_nb_avail)
1660                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1661
1662         /* Scan the H/W ring for packets to receive */
1663         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1664
1665         /* update internal queue state */
1666         rxq->rx_next_avail = 0;
1667         rxq->rx_nb_avail = nb_rx;
1668         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1669
1670         /* if required, allocate new buffers to replenish descriptors */
1671         if (rxq->rx_tail > rxq->rx_free_trigger) {
1672                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1673
1674                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1675                         int i, j;
1676
1677                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1678                                    "queue_id=%u", (unsigned) rxq->port_id,
1679                                    (unsigned) rxq->queue_id);
1680
1681                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1682                                 rxq->rx_free_thresh;
1683
1684                         /*
1685                          * Need to rewind any previous receives if we cannot
1686                          * allocate new buffers to replenish the old ones.
1687                          */
1688                         rxq->rx_nb_avail = 0;
1689                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1690                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1691                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1692
1693                         return 0;
1694                 }
1695
1696                 /* update tail pointer */
1697                 rte_wmb();
1698                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1699                                             cur_free_trigger);
1700         }
1701
1702         if (rxq->rx_tail >= rxq->nb_rx_desc)
1703                 rxq->rx_tail = 0;
1704
1705         /* received any packets this loop? */
1706         if (rxq->rx_nb_avail)
1707                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1708
1709         return 0;
1710 }
1711
1712 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1713 uint16_t
1714 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1715                            uint16_t nb_pkts)
1716 {
1717         uint16_t nb_rx;
1718
1719         if (unlikely(nb_pkts == 0))
1720                 return 0;
1721
1722         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1723                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1724
1725         /* request is relatively large, chunk it up */
1726         nb_rx = 0;
1727         while (nb_pkts) {
1728                 uint16_t ret, n;
1729
1730                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1731                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1732                 nb_rx = (uint16_t)(nb_rx + ret);
1733                 nb_pkts = (uint16_t)(nb_pkts - ret);
1734                 if (ret < n)
1735                         break;
1736         }
1737
1738         return nb_rx;
1739 }
1740
1741 uint16_t
1742 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1743                 uint16_t nb_pkts)
1744 {
1745         struct ixgbe_rx_queue *rxq;
1746         volatile union ixgbe_adv_rx_desc *rx_ring;
1747         volatile union ixgbe_adv_rx_desc *rxdp;
1748         struct ixgbe_rx_entry *sw_ring;
1749         struct ixgbe_rx_entry *rxe;
1750         struct rte_mbuf *rxm;
1751         struct rte_mbuf *nmb;
1752         union ixgbe_adv_rx_desc rxd;
1753         uint64_t dma_addr;
1754         uint32_t staterr;
1755         uint32_t pkt_info;
1756         uint16_t pkt_len;
1757         uint16_t rx_id;
1758         uint16_t nb_rx;
1759         uint16_t nb_hold;
1760         uint64_t pkt_flags;
1761         uint64_t vlan_flags;
1762
1763         nb_rx = 0;
1764         nb_hold = 0;
1765         rxq = rx_queue;
1766         rx_id = rxq->rx_tail;
1767         rx_ring = rxq->rx_ring;
1768         sw_ring = rxq->sw_ring;
1769         vlan_flags = rxq->vlan_flags;
1770         while (nb_rx < nb_pkts) {
1771                 /*
1772                  * The order of operations here is important as the DD status
1773                  * bit must not be read after any other descriptor fields.
1774                  * rx_ring and rxdp are pointing to volatile data so the order
1775                  * of accesses cannot be reordered by the compiler. If they were
1776                  * not volatile, they could be reordered which could lead to
1777                  * using invalid descriptor fields when read from rxd.
1778                  */
1779                 rxdp = &rx_ring[rx_id];
1780                 staterr = rxdp->wb.upper.status_error;
1781                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1782                         break;
1783                 rxd = *rxdp;
1784
1785                 /*
1786                  * End of packet.
1787                  *
1788                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1789                  * is likely to be invalid and to be dropped by the various
1790                  * validation checks performed by the network stack.
1791                  *
1792                  * Allocate a new mbuf to replenish the RX ring descriptor.
1793                  * If the allocation fails:
1794                  *    - arrange for that RX descriptor to be the first one
1795                  *      being parsed the next time the receive function is
1796                  *      invoked [on the same queue].
1797                  *
1798                  *    - Stop parsing the RX ring and return immediately.
1799                  *
1800                  * This policy do not drop the packet received in the RX
1801                  * descriptor for which the allocation of a new mbuf failed.
1802                  * Thus, it allows that packet to be later retrieved if
1803                  * mbuf have been freed in the mean time.
1804                  * As a side effect, holding RX descriptors instead of
1805                  * systematically giving them back to the NIC may lead to
1806                  * RX ring exhaustion situations.
1807                  * However, the NIC can gracefully prevent such situations
1808                  * to happen by sending specific "back-pressure" flow control
1809                  * frames to its peer(s).
1810                  */
1811                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1812                            "ext_err_stat=0x%08x pkt_len=%u",
1813                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1814                            (unsigned) rx_id, (unsigned) staterr,
1815                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1816
1817                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1818                 if (nmb == NULL) {
1819                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1820                                    "queue_id=%u", (unsigned) rxq->port_id,
1821                                    (unsigned) rxq->queue_id);
1822                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1823                         break;
1824                 }
1825
1826                 nb_hold++;
1827                 rxe = &sw_ring[rx_id];
1828                 rx_id++;
1829                 if (rx_id == rxq->nb_rx_desc)
1830                         rx_id = 0;
1831
1832                 /* Prefetch next mbuf while processing current one. */
1833                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1834
1835                 /*
1836                  * When next RX descriptor is on a cache-line boundary,
1837                  * prefetch the next 4 RX descriptors and the next 8 pointers
1838                  * to mbufs.
1839                  */
1840                 if ((rx_id & 0x3) == 0) {
1841                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1842                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1843                 }
1844
1845                 rxm = rxe->mbuf;
1846                 rxe->mbuf = nmb;
1847                 dma_addr =
1848                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1849                 rxdp->read.hdr_addr = 0;
1850                 rxdp->read.pkt_addr = dma_addr;
1851
1852                 /*
1853                  * Initialize the returned mbuf.
1854                  * 1) setup generic mbuf fields:
1855                  *    - number of segments,
1856                  *    - next segment,
1857                  *    - packet length,
1858                  *    - RX port identifier.
1859                  * 2) integrate hardware offload data, if any:
1860                  *    - RSS flag & hash,
1861                  *    - IP checksum flag,
1862                  *    - VLAN TCI, if any,
1863                  *    - error flags.
1864                  */
1865                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1866                                       rxq->crc_len);
1867                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1868                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1869                 rxm->nb_segs = 1;
1870                 rxm->next = NULL;
1871                 rxm->pkt_len = pkt_len;
1872                 rxm->data_len = pkt_len;
1873                 rxm->port = rxq->port_id;
1874
1875                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1876                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1877                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1878
1879                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1880                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1881                 pkt_flags = pkt_flags |
1882                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1883                 rxm->ol_flags = pkt_flags;
1884                 rxm->packet_type =
1885                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1886                                                        rxq->pkt_type_mask);
1887
1888                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1889                         rxm->hash.rss = rte_le_to_cpu_32(
1890                                                 rxd.wb.lower.hi_dword.rss);
1891                 else if (pkt_flags & PKT_RX_FDIR) {
1892                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1893                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1894                                         IXGBE_ATR_HASH_MASK;
1895                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1896                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1897                 }
1898                 /*
1899                  * Store the mbuf address into the next entry of the array
1900                  * of returned packets.
1901                  */
1902                 rx_pkts[nb_rx++] = rxm;
1903         }
1904         rxq->rx_tail = rx_id;
1905
1906         /*
1907          * If the number of free RX descriptors is greater than the RX free
1908          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1909          * register.
1910          * Update the RDT with the value of the last processed RX descriptor
1911          * minus 1, to guarantee that the RDT register is never equal to the
1912          * RDH register, which creates a "full" ring situtation from the
1913          * hardware point of view...
1914          */
1915         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1916         if (nb_hold > rxq->rx_free_thresh) {
1917                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1918                            "nb_hold=%u nb_rx=%u",
1919                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1920                            (unsigned) rx_id, (unsigned) nb_hold,
1921                            (unsigned) nb_rx);
1922                 rx_id = (uint16_t) ((rx_id == 0) ?
1923                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1924                 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1925                 nb_hold = 0;
1926         }
1927         rxq->nb_rx_hold = nb_hold;
1928         return nb_rx;
1929 }
1930
1931 /**
1932  * Detect an RSC descriptor.
1933  */
1934 static inline uint32_t
1935 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1936 {
1937         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1938                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1939 }
1940
1941 /**
1942  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1943  *
1944  * Fill the following info in the HEAD buffer of the Rx cluster:
1945  *    - RX port identifier
1946  *    - hardware offload data, if any:
1947  *      - RSS flag & hash
1948  *      - IP checksum flag
1949  *      - VLAN TCI, if any
1950  *      - error flags
1951  * @head HEAD of the packet cluster
1952  * @desc HW descriptor to get data from
1953  * @rxq Pointer to the Rx queue
1954  */
1955 static inline void
1956 ixgbe_fill_cluster_head_buf(
1957         struct rte_mbuf *head,
1958         union ixgbe_adv_rx_desc *desc,
1959         struct ixgbe_rx_queue *rxq,
1960         uint32_t staterr)
1961 {
1962         uint32_t pkt_info;
1963         uint64_t pkt_flags;
1964
1965         head->port = rxq->port_id;
1966
1967         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1968          * set in the pkt_flags field.
1969          */
1970         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1971         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1972         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1973         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1974         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1975         head->ol_flags = pkt_flags;
1976         head->packet_type =
1977                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1978
1979         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1980                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1981         else if (pkt_flags & PKT_RX_FDIR) {
1982                 head->hash.fdir.hash =
1983                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1984                                                           & IXGBE_ATR_HASH_MASK;
1985                 head->hash.fdir.id =
1986                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1987         }
1988 }
1989
1990 /**
1991  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1992  *
1993  * @rx_queue Rx queue handle
1994  * @rx_pkts table of received packets
1995  * @nb_pkts size of rx_pkts table
1996  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1997  *
1998  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1999  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2000  *
2001  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2002  * 1) When non-EOP RSC completion arrives:
2003  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2004  *       segment's data length.
2005  *    b) Set the "next" pointer of the current segment to point to the segment
2006  *       at the NEXTP index.
2007  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2008  *       in the sw_rsc_ring.
2009  * 2) When EOP arrives we just update the cluster's total length and offload
2010  *    flags and deliver the cluster up to the upper layers. In our case - put it
2011  *    in the rx_pkts table.
2012  *
2013  * Returns the number of received packets/clusters (according to the "bulk
2014  * receive" interface).
2015  */
2016 static inline uint16_t
2017 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2018                     bool bulk_alloc)
2019 {
2020         struct ixgbe_rx_queue *rxq = rx_queue;
2021         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2022         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2023         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2024         uint16_t rx_id = rxq->rx_tail;
2025         uint16_t nb_rx = 0;
2026         uint16_t nb_hold = rxq->nb_rx_hold;
2027         uint16_t prev_id = rxq->rx_tail;
2028
2029         while (nb_rx < nb_pkts) {
2030                 bool eop;
2031                 struct ixgbe_rx_entry *rxe;
2032                 struct ixgbe_scattered_rx_entry *sc_entry;
2033                 struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2034                 struct ixgbe_rx_entry *next_rxe = NULL;
2035                 struct rte_mbuf *first_seg;
2036                 struct rte_mbuf *rxm;
2037                 struct rte_mbuf *nmb = NULL;
2038                 union ixgbe_adv_rx_desc rxd;
2039                 uint16_t data_len;
2040                 uint16_t next_id;
2041                 volatile union ixgbe_adv_rx_desc *rxdp;
2042                 uint32_t staterr;
2043
2044 next_desc:
2045                 /*
2046                  * The code in this whole file uses the volatile pointer to
2047                  * ensure the read ordering of the status and the rest of the
2048                  * descriptor fields (on the compiler level only!!!). This is so
2049                  * UGLY - why not to just use the compiler barrier instead? DPDK
2050                  * even has the rte_compiler_barrier() for that.
2051                  *
2052                  * But most importantly this is just wrong because this doesn't
2053                  * ensure memory ordering in a general case at all. For
2054                  * instance, DPDK is supposed to work on Power CPUs where
2055                  * compiler barrier may just not be enough!
2056                  *
2057                  * I tried to write only this function properly to have a
2058                  * starting point (as a part of an LRO/RSC series) but the
2059                  * compiler cursed at me when I tried to cast away the
2060                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2061                  * keeping it the way it is for now.
2062                  *
2063                  * The code in this file is broken in so many other places and
2064                  * will just not work on a big endian CPU anyway therefore the
2065                  * lines below will have to be revisited together with the rest
2066                  * of the ixgbe PMD.
2067                  *
2068                  * TODO:
2069                  *    - Get rid of "volatile" and let the compiler do its job.
2070                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2071                  *      memory ordering below.
2072                  */
2073                 rxdp = &rx_ring[rx_id];
2074                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2075
2076                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2077                         break;
2078
2079                 rxd = *rxdp;
2080
2081                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2082                                   "staterr=0x%x data_len=%u",
2083                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2084                            rte_le_to_cpu_16(rxd.wb.upper.length));
2085
2086                 if (!bulk_alloc) {
2087                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2088                         if (nmb == NULL) {
2089                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2090                                                   "port_id=%u queue_id=%u",
2091                                            rxq->port_id, rxq->queue_id);
2092
2093                                 rte_eth_devices[rxq->port_id].data->
2094                                                         rx_mbuf_alloc_failed++;
2095                                 break;
2096                         }
2097                 } else if (nb_hold > rxq->rx_free_thresh) {
2098                         uint16_t next_rdt = rxq->rx_free_trigger;
2099
2100                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2101                                 rte_wmb();
2102                                 IXGBE_PCI_REG_WC_WRITE_RELAXED(
2103                                                         rxq->rdt_reg_addr,
2104                                                         next_rdt);
2105                                 nb_hold -= rxq->rx_free_thresh;
2106                         } else {
2107                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2108                                                   "port_id=%u queue_id=%u",
2109                                            rxq->port_id, rxq->queue_id);
2110
2111                                 rte_eth_devices[rxq->port_id].data->
2112                                                         rx_mbuf_alloc_failed++;
2113                                 break;
2114                         }
2115                 }
2116
2117                 nb_hold++;
2118                 rxe = &sw_ring[rx_id];
2119                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2120
2121                 next_id = rx_id + 1;
2122                 if (next_id == rxq->nb_rx_desc)
2123                         next_id = 0;
2124
2125                 /* Prefetch next mbuf while processing current one. */
2126                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2127
2128                 /*
2129                  * When next RX descriptor is on a cache-line boundary,
2130                  * prefetch the next 4 RX descriptors and the next 4 pointers
2131                  * to mbufs.
2132                  */
2133                 if ((next_id & 0x3) == 0) {
2134                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2135                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2136                 }
2137
2138                 rxm = rxe->mbuf;
2139
2140                 if (!bulk_alloc) {
2141                         __le64 dma =
2142                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2143                         /*
2144                          * Update RX descriptor with the physical address of the
2145                          * new data buffer of the new allocated mbuf.
2146                          */
2147                         rxe->mbuf = nmb;
2148
2149                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2150                         rxdp->read.hdr_addr = 0;
2151                         rxdp->read.pkt_addr = dma;
2152                 } else
2153                         rxe->mbuf = NULL;
2154
2155                 /*
2156                  * Set data length & data buffer address of mbuf.
2157                  */
2158                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2159                 rxm->data_len = data_len;
2160
2161                 if (!eop) {
2162                         uint16_t nextp_id;
2163                         /*
2164                          * Get next descriptor index:
2165                          *  - For RSC it's in the NEXTP field.
2166                          *  - For a scattered packet - it's just a following
2167                          *    descriptor.
2168                          */
2169                         if (ixgbe_rsc_count(&rxd))
2170                                 nextp_id =
2171                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2172                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2173                         else
2174                                 nextp_id = next_id;
2175
2176                         next_sc_entry = &sw_sc_ring[nextp_id];
2177                         next_rxe = &sw_ring[nextp_id];
2178                         rte_ixgbe_prefetch(next_rxe);
2179                 }
2180
2181                 sc_entry = &sw_sc_ring[rx_id];
2182                 first_seg = sc_entry->fbuf;
2183                 sc_entry->fbuf = NULL;
2184
2185                 /*
2186                  * If this is the first buffer of the received packet,
2187                  * set the pointer to the first mbuf of the packet and
2188                  * initialize its context.
2189                  * Otherwise, update the total length and the number of segments
2190                  * of the current scattered packet, and update the pointer to
2191                  * the last mbuf of the current packet.
2192                  */
2193                 if (first_seg == NULL) {
2194                         first_seg = rxm;
2195                         first_seg->pkt_len = data_len;
2196                         first_seg->nb_segs = 1;
2197                 } else {
2198                         first_seg->pkt_len += data_len;
2199                         first_seg->nb_segs++;
2200                 }
2201
2202                 prev_id = rx_id;
2203                 rx_id = next_id;
2204
2205                 /*
2206                  * If this is not the last buffer of the received packet, update
2207                  * the pointer to the first mbuf at the NEXTP entry in the
2208                  * sw_sc_ring and continue to parse the RX ring.
2209                  */
2210                 if (!eop && next_rxe) {
2211                         rxm->next = next_rxe->mbuf;
2212                         next_sc_entry->fbuf = first_seg;
2213                         goto next_desc;
2214                 }
2215
2216                 /* Initialize the first mbuf of the returned packet */
2217                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2218
2219                 /*
2220                  * Deal with the case, when HW CRC srip is disabled.
2221                  * That can't happen when LRO is enabled, but still could
2222                  * happen for scattered RX mode.
2223                  */
2224                 first_seg->pkt_len -= rxq->crc_len;
2225                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2226                         struct rte_mbuf *lp;
2227
2228                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2229                                 ;
2230
2231                         first_seg->nb_segs--;
2232                         lp->data_len -= rxq->crc_len - rxm->data_len;
2233                         lp->next = NULL;
2234                         rte_pktmbuf_free_seg(rxm);
2235                 } else
2236                         rxm->data_len -= rxq->crc_len;
2237
2238                 /* Prefetch data of first segment, if configured to do so. */
2239                 rte_packet_prefetch((char *)first_seg->buf_addr +
2240                         first_seg->data_off);
2241
2242                 /*
2243                  * Store the mbuf address into the next entry of the array
2244                  * of returned packets.
2245                  */
2246                 rx_pkts[nb_rx++] = first_seg;
2247         }
2248
2249         /*
2250          * Record index of the next RX descriptor to probe.
2251          */
2252         rxq->rx_tail = rx_id;
2253
2254         /*
2255          * If the number of free RX descriptors is greater than the RX free
2256          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2257          * register.
2258          * Update the RDT with the value of the last processed RX descriptor
2259          * minus 1, to guarantee that the RDT register is never equal to the
2260          * RDH register, which creates a "full" ring situtation from the
2261          * hardware point of view...
2262          */
2263         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2264                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2265                            "nb_hold=%u nb_rx=%u",
2266                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2267
2268                 rte_wmb();
2269                 IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2270                 nb_hold = 0;
2271         }
2272
2273         rxq->nb_rx_hold = nb_hold;
2274         return nb_rx;
2275 }
2276
2277 uint16_t
2278 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2279                                  uint16_t nb_pkts)
2280 {
2281         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2282 }
2283
2284 uint16_t
2285 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2286                                uint16_t nb_pkts)
2287 {
2288         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2289 }
2290
2291 /*********************************************************************
2292  *
2293  *  Queue management functions
2294  *
2295  **********************************************************************/
2296
2297 static void __rte_cold
2298 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2299 {
2300         unsigned i;
2301
2302         if (txq->sw_ring != NULL) {
2303                 for (i = 0; i < txq->nb_tx_desc; i++) {
2304                         if (txq->sw_ring[i].mbuf != NULL) {
2305                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2306                                 txq->sw_ring[i].mbuf = NULL;
2307                         }
2308                 }
2309         }
2310 }
2311
2312 static int
2313 ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2314 {
2315         struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2316         uint16_t i, tx_last, tx_id;
2317         uint16_t nb_tx_free_last;
2318         uint16_t nb_tx_to_clean;
2319         uint32_t pkt_cnt;
2320
2321         /* Start free mbuf from the next of tx_tail */
2322         tx_last = txq->tx_tail;
2323         tx_id  = swr_ring[tx_last].next_id;
2324
2325         if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2326                 return 0;
2327
2328         nb_tx_to_clean = txq->nb_tx_free;
2329         nb_tx_free_last = txq->nb_tx_free;
2330         if (!free_cnt)
2331                 free_cnt = txq->nb_tx_desc;
2332
2333         /* Loop through swr_ring to count the amount of
2334          * freeable mubfs and packets.
2335          */
2336         for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2337                 for (i = 0; i < nb_tx_to_clean &&
2338                         pkt_cnt < free_cnt &&
2339                         tx_id != tx_last; i++) {
2340                         if (swr_ring[tx_id].mbuf != NULL) {
2341                                 rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2342                                 swr_ring[tx_id].mbuf = NULL;
2343
2344                                 /*
2345                                  * last segment in the packet,
2346                                  * increment packet count
2347                                  */
2348                                 pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2349                         }
2350
2351                         tx_id = swr_ring[tx_id].next_id;
2352                 }
2353
2354                 if (txq->tx_rs_thresh > txq->nb_tx_desc -
2355                         txq->nb_tx_free || tx_id == tx_last)
2356                         break;
2357
2358                 if (pkt_cnt < free_cnt) {
2359                         if (ixgbe_xmit_cleanup(txq))
2360                                 break;
2361
2362                         nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2363                         nb_tx_free_last = txq->nb_tx_free;
2364                 }
2365         }
2366
2367         return (int)pkt_cnt;
2368 }
2369
2370 static int
2371 ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2372                         uint32_t free_cnt)
2373 {
2374         int i, n, cnt;
2375
2376         if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2377                 free_cnt = txq->nb_tx_desc;
2378
2379         cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2380
2381         for (i = 0; i < cnt; i += n) {
2382                 if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2383                         break;
2384
2385                 n = ixgbe_tx_free_bufs(txq);
2386
2387                 if (n == 0)
2388                         break;
2389         }
2390
2391         return i;
2392 }
2393
2394 static int
2395 ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2396                         uint32_t free_cnt __rte_unused)
2397 {
2398         return -ENOTSUP;
2399 }
2400
2401 int
2402 ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2403 {
2404         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2405         if (txq->offloads == 0 &&
2406 #ifdef RTE_LIB_SECURITY
2407                         !(txq->using_ipsec) &&
2408 #endif
2409                         txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2410                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2411                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2412                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2413                                         txq->sw_ring_v != NULL)) {
2414                         return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2415                 } else {
2416                         return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2417                 }
2418         }
2419
2420         return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2421 }
2422
2423 static void __rte_cold
2424 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2425 {
2426         if (txq != NULL &&
2427             txq->sw_ring != NULL)
2428                 rte_free(txq->sw_ring);
2429 }
2430
2431 static void __rte_cold
2432 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2433 {
2434         if (txq != NULL && txq->ops != NULL) {
2435                 txq->ops->release_mbufs(txq);
2436                 txq->ops->free_swring(txq);
2437                 rte_free(txq);
2438         }
2439 }
2440
2441 void __rte_cold
2442 ixgbe_dev_tx_queue_release(void *txq)
2443 {
2444         ixgbe_tx_queue_release(txq);
2445 }
2446
2447 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2448 static void __rte_cold
2449 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2450 {
2451         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2452         struct ixgbe_tx_entry *txe = txq->sw_ring;
2453         uint16_t prev, i;
2454
2455         /* Zero out HW ring memory */
2456         for (i = 0; i < txq->nb_tx_desc; i++) {
2457                 txq->tx_ring[i] = zeroed_desc;
2458         }
2459
2460         /* Initialize SW ring entries */
2461         prev = (uint16_t) (txq->nb_tx_desc - 1);
2462         for (i = 0; i < txq->nb_tx_desc; i++) {
2463                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2464
2465                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2466                 txe[i].mbuf = NULL;
2467                 txe[i].last_id = i;
2468                 txe[prev].next_id = i;
2469                 prev = i;
2470         }
2471
2472         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2473         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2474
2475         txq->tx_tail = 0;
2476         txq->nb_tx_used = 0;
2477         /*
2478          * Always allow 1 descriptor to be un-allocated to avoid
2479          * a H/W race condition
2480          */
2481         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2482         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2483         txq->ctx_curr = 0;
2484         memset((void *)&txq->ctx_cache, 0,
2485                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2486 }
2487
2488 static const struct ixgbe_txq_ops def_txq_ops = {
2489         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2490         .free_swring = ixgbe_tx_free_swring,
2491         .reset = ixgbe_reset_tx_queue,
2492 };
2493
2494 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2495  * the queue parameters. Used in tx_queue_setup by primary process and then
2496  * in dev_init by secondary process when attaching to an existing ethdev.
2497  */
2498 void __rte_cold
2499 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2500 {
2501         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2502         if ((txq->offloads == 0) &&
2503 #ifdef RTE_LIB_SECURITY
2504                         !(txq->using_ipsec) &&
2505 #endif
2506                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2507                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2508                 dev->tx_pkt_prepare = NULL;
2509                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2510                                 rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2511                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2512                                         ixgbe_txq_vec_setup(txq) == 0)) {
2513                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2514                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2515                 } else
2516                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2517         } else {
2518                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2519                 PMD_INIT_LOG(DEBUG,
2520                                 " - offloads = 0x%" PRIx64,
2521                                 txq->offloads);
2522                 PMD_INIT_LOG(DEBUG,
2523                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2524                                 (unsigned long)txq->tx_rs_thresh,
2525                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2526                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2527                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2528         }
2529 }
2530
2531 uint64_t
2532 ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2533 {
2534         RTE_SET_USED(dev);
2535
2536         return 0;
2537 }
2538
2539 uint64_t
2540 ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2541 {
2542         uint64_t tx_offload_capa;
2543         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2544
2545         tx_offload_capa =
2546                 DEV_TX_OFFLOAD_VLAN_INSERT |
2547                 DEV_TX_OFFLOAD_IPV4_CKSUM  |
2548                 DEV_TX_OFFLOAD_UDP_CKSUM   |
2549                 DEV_TX_OFFLOAD_TCP_CKSUM   |
2550                 DEV_TX_OFFLOAD_SCTP_CKSUM  |
2551                 DEV_TX_OFFLOAD_TCP_TSO     |
2552                 DEV_TX_OFFLOAD_MULTI_SEGS;
2553
2554         if (hw->mac.type == ixgbe_mac_82599EB ||
2555             hw->mac.type == ixgbe_mac_X540)
2556                 tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2557
2558         if (hw->mac.type == ixgbe_mac_X550 ||
2559             hw->mac.type == ixgbe_mac_X550EM_x ||
2560             hw->mac.type == ixgbe_mac_X550EM_a)
2561                 tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2562
2563 #ifdef RTE_LIB_SECURITY
2564         if (dev->security_ctx)
2565                 tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2566 #endif
2567         return tx_offload_capa;
2568 }
2569
2570 int __rte_cold
2571 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2572                          uint16_t queue_idx,
2573                          uint16_t nb_desc,
2574                          unsigned int socket_id,
2575                          const struct rte_eth_txconf *tx_conf)
2576 {
2577         const struct rte_memzone *tz;
2578         struct ixgbe_tx_queue *txq;
2579         struct ixgbe_hw     *hw;
2580         uint16_t tx_rs_thresh, tx_free_thresh;
2581         uint64_t offloads;
2582
2583         PMD_INIT_FUNC_TRACE();
2584         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2585
2586         offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2587
2588         /*
2589          * Validate number of transmit descriptors.
2590          * It must not exceed hardware maximum, and must be multiple
2591          * of IXGBE_ALIGN.
2592          */
2593         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2594                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2595                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2596                 return -EINVAL;
2597         }
2598
2599         /*
2600          * The following two parameters control the setting of the RS bit on
2601          * transmit descriptors.
2602          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2603          * descriptors have been used.
2604          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2605          * descriptors are used or if the number of descriptors required
2606          * to transmit a packet is greater than the number of free TX
2607          * descriptors.
2608          * The following constraints must be satisfied:
2609          *  tx_rs_thresh must be greater than 0.
2610          *  tx_rs_thresh must be less than the size of the ring minus 2.
2611          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2612          *  tx_rs_thresh must be a divisor of the ring size.
2613          *  tx_free_thresh must be greater than 0.
2614          *  tx_free_thresh must be less than the size of the ring minus 3.
2615          *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2616          * One descriptor in the TX ring is used as a sentinel to avoid a
2617          * H/W race condition, hence the maximum threshold constraints.
2618          * When set to zero use default values.
2619          */
2620         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2621                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2622         /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2623         tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2624                         nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2625         if (tx_conf->tx_rs_thresh > 0)
2626                 tx_rs_thresh = tx_conf->tx_rs_thresh;
2627         if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2628                 PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2629                              "exceed nb_desc. (tx_rs_thresh=%u "
2630                              "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2631                              (unsigned int)tx_rs_thresh,
2632                              (unsigned int)tx_free_thresh,
2633                              (unsigned int)nb_desc,
2634                              (int)dev->data->port_id,
2635                              (int)queue_idx);
2636                 return -(EINVAL);
2637         }
2638         if (tx_rs_thresh >= (nb_desc - 2)) {
2639                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2640                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2641                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2642                         (int)dev->data->port_id, (int)queue_idx);
2643                 return -(EINVAL);
2644         }
2645         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2646                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2647                         "(tx_rs_thresh=%u port=%d queue=%d)",
2648                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2649                         (int)dev->data->port_id, (int)queue_idx);
2650                 return -(EINVAL);
2651         }
2652         if (tx_free_thresh >= (nb_desc - 3)) {
2653                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2654                              "tx_free_thresh must be less than the number of "
2655                              "TX descriptors minus 3. (tx_free_thresh=%u "
2656                              "port=%d queue=%d)",
2657                              (unsigned int)tx_free_thresh,
2658                              (int)dev->data->port_id, (int)queue_idx);
2659                 return -(EINVAL);
2660         }
2661         if (tx_rs_thresh > tx_free_thresh) {
2662                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2663                              "tx_free_thresh. (tx_free_thresh=%u "
2664                              "tx_rs_thresh=%u port=%d queue=%d)",
2665                              (unsigned int)tx_free_thresh,
2666                              (unsigned int)tx_rs_thresh,
2667                              (int)dev->data->port_id,
2668                              (int)queue_idx);
2669                 return -(EINVAL);
2670         }
2671         if ((nb_desc % tx_rs_thresh) != 0) {
2672                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2673                              "number of TX descriptors. (tx_rs_thresh=%u "
2674                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2675                              (int)dev->data->port_id, (int)queue_idx);
2676                 return -(EINVAL);
2677         }
2678
2679         /*
2680          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2681          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2682          * by the NIC and all descriptors are written back after the NIC
2683          * accumulates WTHRESH descriptors.
2684          */
2685         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2686                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2687                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2688                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2689                              (int)dev->data->port_id, (int)queue_idx);
2690                 return -(EINVAL);
2691         }
2692
2693         /* Free memory prior to re-allocation if needed... */
2694         if (dev->data->tx_queues[queue_idx] != NULL) {
2695                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2696                 dev->data->tx_queues[queue_idx] = NULL;
2697         }
2698
2699         /* First allocate the tx queue data structure */
2700         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2701                                  RTE_CACHE_LINE_SIZE, socket_id);
2702         if (txq == NULL)
2703                 return -ENOMEM;
2704
2705         /*
2706          * Allocate TX ring hardware descriptors. A memzone large enough to
2707          * handle the maximum ring size is allocated in order to allow for
2708          * resizing in later calls to the queue setup function.
2709          */
2710         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2711                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2712                         IXGBE_ALIGN, socket_id);
2713         if (tz == NULL) {
2714                 ixgbe_tx_queue_release(txq);
2715                 return -ENOMEM;
2716         }
2717
2718         txq->nb_tx_desc = nb_desc;
2719         txq->tx_rs_thresh = tx_rs_thresh;
2720         txq->tx_free_thresh = tx_free_thresh;
2721         txq->pthresh = tx_conf->tx_thresh.pthresh;
2722         txq->hthresh = tx_conf->tx_thresh.hthresh;
2723         txq->wthresh = tx_conf->tx_thresh.wthresh;
2724         txq->queue_id = queue_idx;
2725         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2726                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2727         txq->port_id = dev->data->port_id;
2728         txq->offloads = offloads;
2729         txq->ops = &def_txq_ops;
2730         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2731 #ifdef RTE_LIB_SECURITY
2732         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2733                         DEV_TX_OFFLOAD_SECURITY);
2734 #endif
2735
2736         /*
2737          * Modification to set VFTDT for virtual function if vf is detected
2738          */
2739         if (hw->mac.type == ixgbe_mac_82599_vf ||
2740             hw->mac.type == ixgbe_mac_X540_vf ||
2741             hw->mac.type == ixgbe_mac_X550_vf ||
2742             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2743             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2744                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2745         else
2746                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2747
2748         txq->tx_ring_phys_addr = tz->iova;
2749         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2750
2751         /* Allocate software ring */
2752         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2753                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2754                                 RTE_CACHE_LINE_SIZE, socket_id);
2755         if (txq->sw_ring == NULL) {
2756                 ixgbe_tx_queue_release(txq);
2757                 return -ENOMEM;
2758         }
2759         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2760                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2761
2762         /* set up vector or scalar TX function as appropriate */
2763         ixgbe_set_tx_function(dev, txq);
2764
2765         txq->ops->reset(txq);
2766
2767         dev->data->tx_queues[queue_idx] = txq;
2768
2769
2770         return 0;
2771 }
2772
2773 /**
2774  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2775  *
2776  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2777  * in the sw_rsc_ring is not set to NULL but rather points to the next
2778  * mbuf of this RSC aggregation (that has not been completed yet and still
2779  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2780  * will just free first "nb_segs" segments of the cluster explicitly by calling
2781  * an rte_pktmbuf_free_seg().
2782  *
2783  * @m scattered cluster head
2784  */
2785 static void __rte_cold
2786 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2787 {
2788         uint16_t i, nb_segs = m->nb_segs;
2789         struct rte_mbuf *next_seg;
2790
2791         for (i = 0; i < nb_segs; i++) {
2792                 next_seg = m->next;
2793                 rte_pktmbuf_free_seg(m);
2794                 m = next_seg;
2795         }
2796 }
2797
2798 static void __rte_cold
2799 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2800 {
2801         unsigned i;
2802
2803         /* SSE Vector driver has a different way of releasing mbufs. */
2804         if (rxq->rx_using_sse) {
2805                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2806                 return;
2807         }
2808
2809         if (rxq->sw_ring != NULL) {
2810                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2811                         if (rxq->sw_ring[i].mbuf != NULL) {
2812                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2813                                 rxq->sw_ring[i].mbuf = NULL;
2814                         }
2815                 }
2816                 if (rxq->rx_nb_avail) {
2817                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2818                                 struct rte_mbuf *mb;
2819
2820                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2821                                 rte_pktmbuf_free_seg(mb);
2822                         }
2823                         rxq->rx_nb_avail = 0;
2824                 }
2825         }
2826
2827         if (rxq->sw_sc_ring)
2828                 for (i = 0; i < rxq->nb_rx_desc; i++)
2829                         if (rxq->sw_sc_ring[i].fbuf) {
2830                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2831                                 rxq->sw_sc_ring[i].fbuf = NULL;
2832                         }
2833 }
2834
2835 static void __rte_cold
2836 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2837 {
2838         if (rxq != NULL) {
2839                 ixgbe_rx_queue_release_mbufs(rxq);
2840                 rte_free(rxq->sw_ring);
2841                 rte_free(rxq->sw_sc_ring);
2842                 rte_free(rxq);
2843         }
2844 }
2845
2846 void __rte_cold
2847 ixgbe_dev_rx_queue_release(void *rxq)
2848 {
2849         ixgbe_rx_queue_release(rxq);
2850 }
2851
2852 /*
2853  * Check if Rx Burst Bulk Alloc function can be used.
2854  * Return
2855  *        0: the preconditions are satisfied and the bulk allocation function
2856  *           can be used.
2857  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2858  *           function must be used.
2859  */
2860 static inline int __rte_cold
2861 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2862 {
2863         int ret = 0;
2864
2865         /*
2866          * Make sure the following pre-conditions are satisfied:
2867          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2868          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2869          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2870          * Scattered packets are not supported.  This should be checked
2871          * outside of this function.
2872          */
2873         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2874                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2875                              "rxq->rx_free_thresh=%d, "
2876                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2877                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2878                 ret = -EINVAL;
2879         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2880                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2881                              "rxq->rx_free_thresh=%d, "
2882                              "rxq->nb_rx_desc=%d",
2883                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2884                 ret = -EINVAL;
2885         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2886                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2887                              "rxq->nb_rx_desc=%d, "
2888                              "rxq->rx_free_thresh=%d",
2889                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2890                 ret = -EINVAL;
2891         }
2892
2893         return ret;
2894 }
2895
2896 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2897 static void __rte_cold
2898 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2899 {
2900         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2901         unsigned i;
2902         uint16_t len = rxq->nb_rx_desc;
2903
2904         /*
2905          * By default, the Rx queue setup function allocates enough memory for
2906          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2907          * extra memory at the end of the descriptor ring to be zero'd out.
2908          */
2909         if (adapter->rx_bulk_alloc_allowed)
2910                 /* zero out extra memory */
2911                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2912
2913         /*
2914          * Zero out HW ring memory. Zero out extra memory at the end of
2915          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2916          * reads extra memory as zeros.
2917          */
2918         for (i = 0; i < len; i++) {
2919                 rxq->rx_ring[i] = zeroed_desc;
2920         }
2921
2922         /*
2923          * initialize extra software ring entries. Space for these extra
2924          * entries is always allocated
2925          */
2926         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2927         for (i = rxq->nb_rx_desc; i < len; ++i) {
2928                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2929         }
2930
2931         rxq->rx_nb_avail = 0;
2932         rxq->rx_next_avail = 0;
2933         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2934         rxq->rx_tail = 0;
2935         rxq->nb_rx_hold = 0;
2936         rxq->pkt_first_seg = NULL;
2937         rxq->pkt_last_seg = NULL;
2938
2939 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2940         rxq->rxrearm_start = 0;
2941         rxq->rxrearm_nb = 0;
2942 #endif
2943 }
2944
2945 static int
2946 ixgbe_is_vf(struct rte_eth_dev *dev)
2947 {
2948         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2949
2950         switch (hw->mac.type) {
2951         case ixgbe_mac_82599_vf:
2952         case ixgbe_mac_X540_vf:
2953         case ixgbe_mac_X550_vf:
2954         case ixgbe_mac_X550EM_x_vf:
2955         case ixgbe_mac_X550EM_a_vf:
2956                 return 1;
2957         default:
2958                 return 0;
2959         }
2960 }
2961
2962 uint64_t
2963 ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2964 {
2965         uint64_t offloads = 0;
2966         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2967
2968         if (hw->mac.type != ixgbe_mac_82598EB)
2969                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2970
2971         return offloads;
2972 }
2973
2974 uint64_t
2975 ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2976 {
2977         uint64_t offloads;
2978         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2979
2980         offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2981                    DEV_RX_OFFLOAD_UDP_CKSUM   |
2982                    DEV_RX_OFFLOAD_TCP_CKSUM   |
2983                    DEV_RX_OFFLOAD_KEEP_CRC    |
2984                    DEV_RX_OFFLOAD_JUMBO_FRAME |
2985                    DEV_RX_OFFLOAD_VLAN_FILTER |
2986                    DEV_RX_OFFLOAD_SCATTER |
2987                    DEV_RX_OFFLOAD_RSS_HASH;
2988
2989         if (hw->mac.type == ixgbe_mac_82598EB)
2990                 offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2991
2992         if (ixgbe_is_vf(dev) == 0)
2993                 offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2994
2995         /*
2996          * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2997          * mode.
2998          */
2999         if ((hw->mac.type == ixgbe_mac_82599EB ||
3000              hw->mac.type == ixgbe_mac_X540 ||
3001              hw->mac.type == ixgbe_mac_X550) &&
3002             !RTE_ETH_DEV_SRIOV(dev).active)
3003                 offloads |= DEV_RX_OFFLOAD_TCP_LRO;
3004
3005         if (hw->mac.type == ixgbe_mac_82599EB ||
3006             hw->mac.type == ixgbe_mac_X540)
3007                 offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3008
3009         if (hw->mac.type == ixgbe_mac_X550 ||
3010             hw->mac.type == ixgbe_mac_X550EM_x ||
3011             hw->mac.type == ixgbe_mac_X550EM_a)
3012                 offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3013
3014 #ifdef RTE_LIB_SECURITY
3015         if (dev->security_ctx)
3016                 offloads |= DEV_RX_OFFLOAD_SECURITY;
3017 #endif
3018
3019         return offloads;
3020 }
3021
3022 int __rte_cold
3023 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3024                          uint16_t queue_idx,
3025                          uint16_t nb_desc,
3026                          unsigned int socket_id,
3027                          const struct rte_eth_rxconf *rx_conf,
3028                          struct rte_mempool *mp)
3029 {
3030         const struct rte_memzone *rz;
3031         struct ixgbe_rx_queue *rxq;
3032         struct ixgbe_hw     *hw;
3033         uint16_t len;
3034         struct ixgbe_adapter *adapter = dev->data->dev_private;
3035         uint64_t offloads;
3036
3037         PMD_INIT_FUNC_TRACE();
3038         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3039
3040         offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3041
3042         /*
3043          * Validate number of receive descriptors.
3044          * It must not exceed hardware maximum, and must be multiple
3045          * of IXGBE_ALIGN.
3046          */
3047         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3048                         (nb_desc > IXGBE_MAX_RING_DESC) ||
3049                         (nb_desc < IXGBE_MIN_RING_DESC)) {
3050                 return -EINVAL;
3051         }
3052
3053         /* Free memory prior to re-allocation if needed... */
3054         if (dev->data->rx_queues[queue_idx] != NULL) {
3055                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3056                 dev->data->rx_queues[queue_idx] = NULL;
3057         }
3058
3059         /* First allocate the rx queue data structure */
3060         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3061                                  RTE_CACHE_LINE_SIZE, socket_id);
3062         if (rxq == NULL)
3063                 return -ENOMEM;
3064         rxq->mb_pool = mp;
3065         rxq->nb_rx_desc = nb_desc;
3066         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3067         rxq->queue_id = queue_idx;
3068         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3069                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3070         rxq->port_id = dev->data->port_id;
3071         if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3072                 rxq->crc_len = RTE_ETHER_CRC_LEN;
3073         else
3074                 rxq->crc_len = 0;
3075         rxq->drop_en = rx_conf->rx_drop_en;
3076         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3077         rxq->offloads = offloads;
3078
3079         /*
3080          * The packet type in RX descriptor is different for different NICs.
3081          * Some bits are used for x550 but reserved for other NICS.
3082          * So set different masks for different NICs.
3083          */
3084         if (hw->mac.type == ixgbe_mac_X550 ||
3085             hw->mac.type == ixgbe_mac_X550EM_x ||
3086             hw->mac.type == ixgbe_mac_X550EM_a ||
3087             hw->mac.type == ixgbe_mac_X550_vf ||
3088             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3089             hw->mac.type == ixgbe_mac_X550EM_a_vf)
3090                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3091         else
3092                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3093
3094         /*
3095          * Allocate RX ring hardware descriptors. A memzone large enough to
3096          * handle the maximum ring size is allocated in order to allow for
3097          * resizing in later calls to the queue setup function.
3098          */
3099         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3100                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
3101         if (rz == NULL) {
3102                 ixgbe_rx_queue_release(rxq);
3103                 return -ENOMEM;
3104         }
3105
3106         /*
3107          * Zero init all the descriptors in the ring.
3108          */
3109         memset(rz->addr, 0, RX_RING_SZ);
3110
3111         /*
3112          * Modified to setup VFRDT for Virtual Function
3113          */
3114         if (hw->mac.type == ixgbe_mac_82599_vf ||
3115             hw->mac.type == ixgbe_mac_X540_vf ||
3116             hw->mac.type == ixgbe_mac_X550_vf ||
3117             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3118             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3119                 rxq->rdt_reg_addr =
3120                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3121                 rxq->rdh_reg_addr =
3122                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3123         } else {
3124                 rxq->rdt_reg_addr =
3125                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3126                 rxq->rdh_reg_addr =
3127                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3128         }
3129
3130         rxq->rx_ring_phys_addr = rz->iova;
3131         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3132
3133         /*
3134          * Certain constraints must be met in order to use the bulk buffer
3135          * allocation Rx burst function. If any of Rx queues doesn't meet them
3136          * the feature should be disabled for the whole port.
3137          */
3138         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3139                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3140                                     "preconditions - canceling the feature for "
3141                                     "the whole port[%d]",
3142                              rxq->queue_id, rxq->port_id);
3143                 adapter->rx_bulk_alloc_allowed = false;
3144         }
3145
3146         /*
3147          * Allocate software ring. Allow for space at the end of the
3148          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3149          * function does not access an invalid memory region.
3150          */
3151         len = nb_desc;
3152         if (adapter->rx_bulk_alloc_allowed)
3153                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
3154
3155         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3156                                           sizeof(struct ixgbe_rx_entry) * len,
3157                                           RTE_CACHE_LINE_SIZE, socket_id);
3158         if (!rxq->sw_ring) {
3159                 ixgbe_rx_queue_release(rxq);
3160                 return -ENOMEM;
3161         }
3162
3163         /*
3164          * Always allocate even if it's not going to be needed in order to
3165          * simplify the code.
3166          *
3167          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3168          * be requested in ixgbe_dev_rx_init(), which is called later from
3169          * dev_start() flow.
3170          */
3171         rxq->sw_sc_ring =
3172                 rte_zmalloc_socket("rxq->sw_sc_ring",
3173                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
3174                                    RTE_CACHE_LINE_SIZE, socket_id);
3175         if (!rxq->sw_sc_ring) {
3176                 ixgbe_rx_queue_release(rxq);
3177                 return -ENOMEM;
3178         }
3179
3180         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3181                             "dma_addr=0x%"PRIx64,
3182                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3183                      rxq->rx_ring_phys_addr);
3184
3185         if (!rte_is_power_of_2(nb_desc)) {
3186                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3187                                     "preconditions - canceling the feature for "
3188                                     "the whole port[%d]",
3189                              rxq->queue_id, rxq->port_id);
3190                 adapter->rx_vec_allowed = false;
3191         } else
3192                 ixgbe_rxq_vec_setup(rxq);
3193
3194         dev->data->rx_queues[queue_idx] = rxq;
3195
3196         ixgbe_reset_rx_queue(adapter, rxq);
3197
3198         return 0;
3199 }
3200
3201 uint32_t
3202 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3203 {
3204 #define IXGBE_RXQ_SCAN_INTERVAL 4
3205         volatile union ixgbe_adv_rx_desc *rxdp;
3206         struct ixgbe_rx_queue *rxq;
3207         uint32_t desc = 0;
3208
3209         rxq = dev->data->rx_queues[rx_queue_id];
3210         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3211
3212         while ((desc < rxq->nb_rx_desc) &&
3213                 (rxdp->wb.upper.status_error &
3214                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3215                 desc += IXGBE_RXQ_SCAN_INTERVAL;
3216                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3217                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3218                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
3219                                 desc - rxq->nb_rx_desc]);
3220         }
3221
3222         return desc;
3223 }
3224
3225 int
3226 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3227 {
3228         volatile union ixgbe_adv_rx_desc *rxdp;
3229         struct ixgbe_rx_queue *rxq = rx_queue;
3230         uint32_t desc;
3231
3232         if (unlikely(offset >= rxq->nb_rx_desc))
3233                 return 0;
3234         desc = rxq->rx_tail + offset;
3235         if (desc >= rxq->nb_rx_desc)
3236                 desc -= rxq->nb_rx_desc;
3237
3238         rxdp = &rxq->rx_ring[desc];
3239         return !!(rxdp->wb.upper.status_error &
3240                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3241 }
3242
3243 int
3244 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3245 {
3246         struct ixgbe_rx_queue *rxq = rx_queue;
3247         volatile uint32_t *status;
3248         uint32_t nb_hold, desc;
3249
3250         if (unlikely(offset >= rxq->nb_rx_desc))
3251                 return -EINVAL;
3252
3253 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3254         if (rxq->rx_using_sse)
3255                 nb_hold = rxq->rxrearm_nb;
3256         else
3257 #endif
3258                 nb_hold = rxq->nb_rx_hold;
3259         if (offset >= rxq->nb_rx_desc - nb_hold)
3260                 return RTE_ETH_RX_DESC_UNAVAIL;
3261
3262         desc = rxq->rx_tail + offset;
3263         if (desc >= rxq->nb_rx_desc)
3264                 desc -= rxq->nb_rx_desc;
3265
3266         status = &rxq->rx_ring[desc].wb.upper.status_error;
3267         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3268                 return RTE_ETH_RX_DESC_DONE;
3269
3270         return RTE_ETH_RX_DESC_AVAIL;
3271 }
3272
3273 int
3274 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3275 {
3276         struct ixgbe_tx_queue *txq = tx_queue;
3277         volatile uint32_t *status;
3278         uint32_t desc;
3279
3280         if (unlikely(offset >= txq->nb_tx_desc))
3281                 return -EINVAL;
3282
3283         desc = txq->tx_tail + offset;
3284         /* go to next desc that has the RS bit */
3285         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3286                 txq->tx_rs_thresh;
3287         if (desc >= txq->nb_tx_desc) {
3288                 desc -= txq->nb_tx_desc;
3289                 if (desc >= txq->nb_tx_desc)
3290                         desc -= txq->nb_tx_desc;
3291         }
3292
3293         status = &txq->tx_ring[desc].wb.status;
3294         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3295                 return RTE_ETH_TX_DESC_DONE;
3296
3297         return RTE_ETH_TX_DESC_FULL;
3298 }
3299
3300 /*
3301  * Set up link loopback for X540/X550 mode Tx->Rx.
3302  */
3303 static inline void __rte_cold
3304 ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3305 {
3306         uint32_t macc;
3307         PMD_INIT_FUNC_TRACE();
3308
3309         u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3310
3311         hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3312                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3313         macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3314
3315         if (enable) {
3316                 /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3317                 autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3318                 /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3319                 macc |= IXGBE_MACC_FLU;
3320         } else {
3321                 autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3322                 macc &= ~IXGBE_MACC_FLU;
3323         }
3324
3325         hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3326                               IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3327
3328         IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3329 }
3330
3331 void __rte_cold
3332 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3333 {
3334         unsigned i;
3335         struct ixgbe_adapter *adapter = dev->data->dev_private;
3336         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3337
3338         PMD_INIT_FUNC_TRACE();
3339
3340         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3341                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3342
3343                 if (txq != NULL) {
3344                         txq->ops->release_mbufs(txq);
3345                         txq->ops->reset(txq);
3346                 }
3347         }
3348
3349         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3350                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3351
3352                 if (rxq != NULL) {
3353                         ixgbe_rx_queue_release_mbufs(rxq);
3354                         ixgbe_reset_rx_queue(adapter, rxq);
3355                 }
3356         }
3357         /* If loopback mode was enabled, reconfigure the link accordingly */
3358         if (dev->data->dev_conf.lpbk_mode != 0) {
3359                 if (hw->mac.type == ixgbe_mac_X540 ||
3360                      hw->mac.type == ixgbe_mac_X550 ||
3361                      hw->mac.type == ixgbe_mac_X550EM_x ||
3362                      hw->mac.type == ixgbe_mac_X550EM_a)
3363                         ixgbe_setup_loopback_link_x540_x550(hw, false);
3364         }
3365 }
3366
3367 void
3368 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3369 {
3370         unsigned i;
3371
3372         PMD_INIT_FUNC_TRACE();
3373
3374         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3375                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3376                 dev->data->rx_queues[i] = NULL;
3377                 rte_eth_dma_zone_free(dev, "rx_ring", i);
3378         }
3379         dev->data->nb_rx_queues = 0;
3380
3381         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3382                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3383                 dev->data->tx_queues[i] = NULL;
3384                 rte_eth_dma_zone_free(dev, "tx_ring", i);
3385         }
3386         dev->data->nb_tx_queues = 0;
3387 }
3388
3389 /*********************************************************************
3390  *
3391  *  Device RX/TX init functions
3392  *
3393  **********************************************************************/
3394
3395 /**
3396  * Receive Side Scaling (RSS)
3397  * See section 7.1.2.8 in the following document:
3398  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3399  *
3400  * Principles:
3401  * The source and destination IP addresses of the IP header and the source
3402  * and destination ports of TCP/UDP headers, if any, of received packets are
3403  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3404  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3405  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3406  * RSS output index which is used as the RX queue index where to store the
3407  * received packets.
3408  * The following output is supplied in the RX write-back descriptor:
3409  *     - 32-bit result of the Microsoft RSS hash function,
3410  *     - 4-bit RSS type field.
3411  */
3412
3413 /*
3414  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3415  * Used as the default key.
3416  */
3417 static uint8_t rss_intel_key[40] = {
3418         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3419         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3420         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3421         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3422         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3423 };
3424
3425 static void
3426 ixgbe_rss_disable(struct rte_eth_dev *dev)
3427 {
3428         struct ixgbe_hw *hw;
3429         uint32_t mrqc;
3430         uint32_t mrqc_reg;
3431
3432         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3433         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3434         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3435         mrqc &= ~IXGBE_MRQC_RSSEN;
3436         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3437 }
3438
3439 static void
3440 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3441 {
3442         uint8_t  *hash_key;
3443         uint32_t mrqc;
3444         uint32_t rss_key;
3445         uint64_t rss_hf;
3446         uint16_t i;
3447         uint32_t mrqc_reg;
3448         uint32_t rssrk_reg;
3449
3450         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3451         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3452
3453         hash_key = rss_conf->rss_key;
3454         if (hash_key != NULL) {
3455                 /* Fill in RSS hash key */
3456                 for (i = 0; i < 10; i++) {
3457                         rss_key  = hash_key[(i * 4)];
3458                         rss_key |= hash_key[(i * 4) + 1] << 8;
3459                         rss_key |= hash_key[(i * 4) + 2] << 16;
3460                         rss_key |= hash_key[(i * 4) + 3] << 24;
3461                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3462                 }
3463         }
3464
3465         /* Set configured hashing protocols in MRQC register */
3466         rss_hf = rss_conf->rss_hf;
3467         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3468         if (rss_hf & ETH_RSS_IPV4)
3469                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3470         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3471                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3472         if (rss_hf & ETH_RSS_IPV6)
3473                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3474         if (rss_hf & ETH_RSS_IPV6_EX)
3475                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3476         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3477                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3478         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3479                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3480         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3481                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3482         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3483                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3484         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3485                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3486         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3487 }
3488
3489 int
3490 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3491                           struct rte_eth_rss_conf *rss_conf)
3492 {
3493         struct ixgbe_hw *hw;
3494         uint32_t mrqc;
3495         uint64_t rss_hf;
3496         uint32_t mrqc_reg;
3497
3498         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3499
3500         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3501                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3502                         "NIC.");
3503                 return -ENOTSUP;
3504         }
3505         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3506
3507         /*
3508          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3509          *     "RSS enabling cannot be done dynamically while it must be
3510          *      preceded by a software reset"
3511          * Before changing anything, first check that the update RSS operation
3512          * does not attempt to disable RSS, if RSS was enabled at
3513          * initialization time, or does not attempt to enable RSS, if RSS was
3514          * disabled at initialization time.
3515          */
3516         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3517         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3518         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3519                 if (rss_hf != 0) /* Enable RSS */
3520                         return -(EINVAL);
3521                 return 0; /* Nothing to do */
3522         }
3523         /* RSS enabled */
3524         if (rss_hf == 0) /* Disable RSS */
3525                 return -(EINVAL);
3526         ixgbe_hw_rss_hash_set(hw, rss_conf);
3527         return 0;
3528 }
3529
3530 int
3531 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3532                             struct rte_eth_rss_conf *rss_conf)
3533 {
3534         struct ixgbe_hw *hw;
3535         uint8_t *hash_key;
3536         uint32_t mrqc;
3537         uint32_t rss_key;
3538         uint64_t rss_hf;
3539         uint16_t i;
3540         uint32_t mrqc_reg;
3541         uint32_t rssrk_reg;
3542
3543         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3544         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3545         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3546         hash_key = rss_conf->rss_key;
3547         if (hash_key != NULL) {
3548                 /* Return RSS hash key */
3549                 for (i = 0; i < 10; i++) {
3550                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3551                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3552                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3553                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3554                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3555                 }
3556         }
3557
3558         /* Get RSS functions configured in MRQC register */
3559         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3560         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3561                 rss_conf->rss_hf = 0;
3562                 return 0;
3563         }
3564         rss_hf = 0;
3565         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3566                 rss_hf |= ETH_RSS_IPV4;
3567         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3568                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3569         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3570                 rss_hf |= ETH_RSS_IPV6;
3571         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3572                 rss_hf |= ETH_RSS_IPV6_EX;
3573         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3574                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3575         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3576                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3577         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3578                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3579         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3580                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3581         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3582                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3583         rss_conf->rss_hf = rss_hf;
3584         return 0;
3585 }
3586
3587 static void
3588 ixgbe_rss_configure(struct rte_eth_dev *dev)
3589 {
3590         struct rte_eth_rss_conf rss_conf;
3591         struct ixgbe_adapter *adapter;
3592         struct ixgbe_hw *hw;
3593         uint32_t reta;
3594         uint16_t i;
3595         uint16_t j;
3596         uint16_t sp_reta_size;
3597         uint32_t reta_reg;
3598
3599         PMD_INIT_FUNC_TRACE();
3600         adapter = dev->data->dev_private;
3601         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3602
3603         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3604
3605         /*
3606          * Fill in redirection table
3607          * The byte-swap is needed because NIC registers are in
3608          * little-endian order.
3609          */
3610         if (adapter->rss_reta_updated == 0) {
3611                 reta = 0;
3612                 for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3613                         reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3614
3615                         if (j == dev->data->nb_rx_queues)
3616                                 j = 0;
3617                         reta = (reta << 8) | j;
3618                         if ((i & 3) == 3)
3619                                 IXGBE_WRITE_REG(hw, reta_reg,
3620                                                 rte_bswap32(reta));
3621                 }
3622         }
3623
3624         /*
3625          * Configure the RSS key and the RSS protocols used to compute
3626          * the RSS hash of input packets.
3627          */
3628         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3629         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3630                 ixgbe_rss_disable(dev);
3631                 return;
3632         }
3633         if (rss_conf.rss_key == NULL)
3634                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3635         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3636 }
3637
3638 #define NUM_VFTA_REGISTERS 128
3639 #define NIC_RX_BUFFER_SIZE 0x200
3640 #define X550_RX_BUFFER_SIZE 0x180
3641
3642 static void
3643 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3644 {
3645         struct rte_eth_vmdq_dcb_conf *cfg;
3646         struct ixgbe_hw *hw;
3647         enum rte_eth_nb_pools num_pools;
3648         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3649         uint16_t pbsize;
3650         uint8_t nb_tcs; /* number of traffic classes */
3651         int i;
3652
3653         PMD_INIT_FUNC_TRACE();
3654         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3655         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3656         num_pools = cfg->nb_queue_pools;
3657         /* Check we have a valid number of pools */
3658         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3659                 ixgbe_rss_disable(dev);
3660                 return;
3661         }
3662         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3663         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3664
3665         /*
3666          * RXPBSIZE
3667          * split rx buffer up into sections, each for 1 traffic class
3668          */
3669         switch (hw->mac.type) {
3670         case ixgbe_mac_X550:
3671         case ixgbe_mac_X550EM_x:
3672         case ixgbe_mac_X550EM_a:
3673                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3674                 break;
3675         default:
3676                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3677                 break;
3678         }
3679         for (i = 0; i < nb_tcs; i++) {
3680                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3681
3682                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3683                 /* clear 10 bits. */
3684                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3685                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3686         }
3687         /* zero alloc all unused TCs */
3688         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3689                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3690
3691                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3692                 /* clear 10 bits. */
3693                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3694         }
3695
3696         /* MRQC: enable vmdq and dcb */
3697         mrqc = (num_pools == ETH_16_POOLS) ?
3698                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3699         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3700
3701         /* PFVTCTL: turn on virtualisation and set the default pool */
3702         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3703         if (cfg->enable_default_pool) {
3704                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3705         } else {
3706                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3707         }
3708
3709         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3710
3711         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3712         queue_mapping = 0;
3713         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3714                 /*
3715                  * mapping is done with 3 bits per priority,
3716                  * so shift by i*3 each time
3717                  */
3718                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3719
3720         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3721
3722         /* RTRPCS: DCB related */
3723         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3724
3725         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3726         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3727         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3728         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3729
3730         /* VFTA - enable all vlan filters */
3731         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3732                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3733         }
3734
3735         /* VFRE: pool enabling for receive - 16 or 32 */
3736         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3737                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3738
3739         /*
3740          * MPSAR - allow pools to read specific mac addresses
3741          * In this case, all pools should be able to read from mac addr 0
3742          */
3743         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3744         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3745
3746         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3747         for (i = 0; i < cfg->nb_pool_maps; i++) {
3748                 /* set vlan id in VF register and set the valid bit */
3749                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3750                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3751                 /*
3752                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3753                  * pools, we only need to use the first half of the register
3754                  * i.e. bits 0-31
3755                  */
3756                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3757         }
3758 }
3759
3760 /**
3761  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3762  * @dev: pointer to eth_dev structure
3763  * @dcb_config: pointer to ixgbe_dcb_config structure
3764  */
3765 static void
3766 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3767                        struct ixgbe_dcb_config *dcb_config)
3768 {
3769         uint32_t reg;
3770         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3771
3772         PMD_INIT_FUNC_TRACE();
3773         if (hw->mac.type != ixgbe_mac_82598EB) {
3774                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3775                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3776                 reg |= IXGBE_RTTDCS_ARBDIS;
3777                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3778
3779                 /* Enable DCB for Tx with 8 TCs */
3780                 if (dcb_config->num_tcs.pg_tcs == 8) {
3781                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3782                 } else {
3783                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3784                 }
3785                 if (dcb_config->vt_mode)
3786                         reg |= IXGBE_MTQC_VT_ENA;
3787                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3788
3789                 /* Enable the Tx desc arbiter */
3790                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3791                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3792                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3793
3794                 /* Enable Security TX Buffer IFG for DCB */
3795                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3796                 reg |= IXGBE_SECTX_DCB;
3797                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3798         }
3799 }
3800
3801 /**
3802  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3803  * @dev: pointer to rte_eth_dev structure
3804  * @dcb_config: pointer to ixgbe_dcb_config structure
3805  */
3806 static void
3807 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3808                         struct ixgbe_dcb_config *dcb_config)
3809 {
3810         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3811                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3812         struct ixgbe_hw *hw =
3813                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3814
3815         PMD_INIT_FUNC_TRACE();
3816         if (hw->mac.type != ixgbe_mac_82598EB)
3817                 /*PF VF Transmit Enable*/
3818                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3819                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3820
3821         /*Configure general DCB TX parameters*/
3822         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3823 }
3824
3825 static void
3826 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3827                         struct ixgbe_dcb_config *dcb_config)
3828 {
3829         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3830                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3831         struct ixgbe_dcb_tc_config *tc;
3832         uint8_t i, j;
3833
3834         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3835         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3836                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3837                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3838         } else {
3839                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3840                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3841         }
3842
3843         /* Initialize User Priority to Traffic Class mapping */
3844         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3845                 tc = &dcb_config->tc_config[j];
3846                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3847         }
3848
3849         /* User Priority to Traffic Class mapping */
3850         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3851                 j = vmdq_rx_conf->dcb_tc[i];
3852                 tc = &dcb_config->tc_config[j];
3853                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3854                                                 (uint8_t)(1 << i);
3855         }
3856 }
3857
3858 static void
3859 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3860                         struct ixgbe_dcb_config *dcb_config)
3861 {
3862         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3863                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3864         struct ixgbe_dcb_tc_config *tc;
3865         uint8_t i, j;
3866
3867         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3868         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3869                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3870                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3871         } else {
3872                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3873                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3874         }
3875
3876         /* Initialize User Priority to Traffic Class mapping */
3877         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3878                 tc = &dcb_config->tc_config[j];
3879                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3880         }
3881
3882         /* User Priority to Traffic Class mapping */
3883         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3884                 j = vmdq_tx_conf->dcb_tc[i];
3885                 tc = &dcb_config->tc_config[j];
3886                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3887                                                 (uint8_t)(1 << i);
3888         }
3889 }
3890
3891 static void
3892 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3893                 struct ixgbe_dcb_config *dcb_config)
3894 {
3895         struct rte_eth_dcb_rx_conf *rx_conf =
3896                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3897         struct ixgbe_dcb_tc_config *tc;
3898         uint8_t i, j;
3899
3900         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3901         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3902
3903         /* Initialize User Priority to Traffic Class mapping */
3904         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3905                 tc = &dcb_config->tc_config[j];
3906                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3907         }
3908
3909         /* User Priority to Traffic Class mapping */
3910         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3911                 j = rx_conf->dcb_tc[i];
3912                 tc = &dcb_config->tc_config[j];
3913                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3914                                                 (uint8_t)(1 << i);
3915         }
3916 }
3917
3918 static void
3919 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3920                 struct ixgbe_dcb_config *dcb_config)
3921 {
3922         struct rte_eth_dcb_tx_conf *tx_conf =
3923                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3924         struct ixgbe_dcb_tc_config *tc;
3925         uint8_t i, j;
3926
3927         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3928         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3929
3930         /* Initialize User Priority to Traffic Class mapping */
3931         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3932                 tc = &dcb_config->tc_config[j];
3933                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3934         }
3935
3936         /* User Priority to Traffic Class mapping */
3937         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3938                 j = tx_conf->dcb_tc[i];
3939                 tc = &dcb_config->tc_config[j];
3940                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3941                                                 (uint8_t)(1 << i);
3942         }
3943 }
3944
3945 /**
3946  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3947  * @dev: pointer to eth_dev structure
3948  * @dcb_config: pointer to ixgbe_dcb_config structure
3949  */
3950 static void
3951 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3952                        struct ixgbe_dcb_config *dcb_config)
3953 {
3954         uint32_t reg;
3955         uint32_t vlanctrl;
3956         uint8_t i;
3957         uint32_t q;
3958         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3959
3960         PMD_INIT_FUNC_TRACE();
3961         /*
3962          * Disable the arbiter before changing parameters
3963          * (always enable recycle mode; WSP)
3964          */
3965         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3966         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3967
3968         if (hw->mac.type != ixgbe_mac_82598EB) {
3969                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3970                 if (dcb_config->num_tcs.pg_tcs == 4) {
3971                         if (dcb_config->vt_mode)
3972                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3973                                         IXGBE_MRQC_VMDQRT4TCEN;
3974                         else {
3975                                 /* no matter the mode is DCB or DCB_RSS, just
3976                                  * set the MRQE to RSSXTCEN. RSS is controlled
3977                                  * by RSS_FIELD
3978                                  */
3979                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3980                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3981                                         IXGBE_MRQC_RTRSS4TCEN;
3982                         }
3983                 }
3984                 if (dcb_config->num_tcs.pg_tcs == 8) {
3985                         if (dcb_config->vt_mode)
3986                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3987                                         IXGBE_MRQC_VMDQRT8TCEN;
3988                         else {
3989                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3990                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3991                                         IXGBE_MRQC_RTRSS8TCEN;
3992                         }
3993                 }
3994
3995                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3996
3997                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3998                         /* Disable drop for all queues in VMDQ mode*/
3999                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4000                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4001                                                 (IXGBE_QDE_WRITE |
4002                                                  (q << IXGBE_QDE_IDX_SHIFT)));
4003                 } else {
4004                         /* Enable drop for all queues in SRIOV mode */
4005                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4006                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4007                                                 (IXGBE_QDE_WRITE |
4008                                                  (q << IXGBE_QDE_IDX_SHIFT) |
4009                                                  IXGBE_QDE_ENABLE));
4010                 }
4011         }
4012
4013         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4014         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4015         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4016         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4017
4018         /* VFTA - enable all vlan filters */
4019         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4020                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4021         }
4022
4023         /*
4024          * Configure Rx packet plane (recycle mode; WSP) and
4025          * enable arbiter
4026          */
4027         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4028         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4029 }
4030
4031 static void
4032 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4033                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4034 {
4035         switch (hw->mac.type) {
4036         case ixgbe_mac_82598EB:
4037                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4038                 break;
4039         case ixgbe_mac_82599EB:
4040         case ixgbe_mac_X540:
4041         case ixgbe_mac_X550:
4042         case ixgbe_mac_X550EM_x:
4043         case ixgbe_mac_X550EM_a:
4044                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4045                                                   tsa, map);
4046                 break;
4047         default:
4048                 break;
4049         }
4050 }
4051
4052 static void
4053 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4054                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4055 {
4056         switch (hw->mac.type) {
4057         case ixgbe_mac_82598EB:
4058                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4059                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4060                 break;
4061         case ixgbe_mac_82599EB:
4062         case ixgbe_mac_X540:
4063         case ixgbe_mac_X550:
4064         case ixgbe_mac_X550EM_x:
4065         case ixgbe_mac_X550EM_a:
4066                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4067                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4068                 break;
4069         default:
4070                 break;
4071         }
4072 }
4073
4074 #define DCB_RX_CONFIG  1
4075 #define DCB_TX_CONFIG  1
4076 #define DCB_TX_PB      1024
4077 /**
4078  * ixgbe_dcb_hw_configure - Enable DCB and configure
4079  * general DCB in VT mode and non-VT mode parameters
4080  * @dev: pointer to rte_eth_dev structure
4081  * @dcb_config: pointer to ixgbe_dcb_config structure
4082  */
4083 static int
4084 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4085                         struct ixgbe_dcb_config *dcb_config)
4086 {
4087         int     ret = 0;
4088         uint8_t i, pfc_en, nb_tcs;
4089         uint16_t pbsize, rx_buffer_size;
4090         uint8_t config_dcb_rx = 0;
4091         uint8_t config_dcb_tx = 0;
4092         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4093         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4094         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4095         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4096         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4097         struct ixgbe_dcb_tc_config *tc;
4098         uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4099                 RTE_ETHER_CRC_LEN;
4100         struct ixgbe_hw *hw =
4101                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4102         struct ixgbe_bw_conf *bw_conf =
4103                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4104
4105         switch (dev->data->dev_conf.rxmode.mq_mode) {
4106         case ETH_MQ_RX_VMDQ_DCB:
4107                 dcb_config->vt_mode = true;
4108                 if (hw->mac.type != ixgbe_mac_82598EB) {
4109                         config_dcb_rx = DCB_RX_CONFIG;
4110                         /*
4111                          *get dcb and VT rx configuration parameters
4112                          *from rte_eth_conf
4113                          */
4114                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4115                         /*Configure general VMDQ and DCB RX parameters*/
4116                         ixgbe_vmdq_dcb_configure(dev);
4117                 }
4118                 break;
4119         case ETH_MQ_RX_DCB:
4120         case ETH_MQ_RX_DCB_RSS:
4121                 dcb_config->vt_mode = false;
4122                 config_dcb_rx = DCB_RX_CONFIG;
4123                 /* Get dcb TX configuration parameters from rte_eth_conf */
4124                 ixgbe_dcb_rx_config(dev, dcb_config);
4125                 /*Configure general DCB RX parameters*/
4126                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
4127                 break;
4128         default:
4129                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4130                 break;
4131         }
4132         switch (dev->data->dev_conf.txmode.mq_mode) {
4133         case ETH_MQ_TX_VMDQ_DCB:
4134                 dcb_config->vt_mode = true;
4135                 config_dcb_tx = DCB_TX_CONFIG;
4136                 /* get DCB and VT TX configuration parameters
4137                  * from rte_eth_conf
4138                  */
4139                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
4140                 /*Configure general VMDQ and DCB TX parameters*/
4141                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4142                 break;
4143
4144         case ETH_MQ_TX_DCB:
4145                 dcb_config->vt_mode = false;
4146                 config_dcb_tx = DCB_TX_CONFIG;
4147                 /*get DCB TX configuration parameters from rte_eth_conf*/
4148                 ixgbe_dcb_tx_config(dev, dcb_config);
4149                 /*Configure general DCB TX parameters*/
4150                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
4151                 break;
4152         default:
4153                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4154                 break;
4155         }
4156
4157         nb_tcs = dcb_config->num_tcs.pfc_tcs;
4158         /* Unpack map */
4159         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4160         if (nb_tcs == ETH_4_TCS) {
4161                 /* Avoid un-configured priority mapping to TC0 */
4162                 uint8_t j = 4;
4163                 uint8_t mask = 0xFF;
4164
4165                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4166                         mask = (uint8_t)(mask & (~(1 << map[i])));
4167                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4168                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4169                                 map[j++] = i;
4170                         mask >>= 1;
4171                 }
4172                 /* Re-configure 4 TCs BW */
4173                 for (i = 0; i < nb_tcs; i++) {
4174                         tc = &dcb_config->tc_config[i];
4175                         if (bw_conf->tc_num != nb_tcs)
4176                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4177                                         (uint8_t)(100 / nb_tcs);
4178                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4179                                                 (uint8_t)(100 / nb_tcs);
4180                 }
4181                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4182                         tc = &dcb_config->tc_config[i];
4183                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4184                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4185                 }
4186         } else {
4187                 /* Re-configure 8 TCs BW */
4188                 for (i = 0; i < nb_tcs; i++) {
4189                         tc = &dcb_config->tc_config[i];
4190                         if (bw_conf->tc_num != nb_tcs)
4191                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4192                                         (uint8_t)(100 / nb_tcs + (i & 1));
4193                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4194                                 (uint8_t)(100 / nb_tcs + (i & 1));
4195                 }
4196         }
4197
4198         switch (hw->mac.type) {
4199         case ixgbe_mac_X550:
4200         case ixgbe_mac_X550EM_x:
4201         case ixgbe_mac_X550EM_a:
4202                 rx_buffer_size = X550_RX_BUFFER_SIZE;
4203                 break;
4204         default:
4205                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
4206                 break;
4207         }
4208
4209         if (config_dcb_rx) {
4210                 /* Set RX buffer size */
4211                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4212                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4213
4214                 for (i = 0; i < nb_tcs; i++) {
4215                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4216                 }
4217                 /* zero alloc all unused TCs */
4218                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4219                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4220                 }
4221         }
4222         if (config_dcb_tx) {
4223                 /* Only support an equally distributed
4224                  *  Tx packet buffer strategy.
4225                  */
4226                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4227                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4228
4229                 for (i = 0; i < nb_tcs; i++) {
4230                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4231                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4232                 }
4233                 /* Clear unused TCs, if any, to zero buffer size*/
4234                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4235                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4236                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4237                 }
4238         }
4239
4240         /*Calculates traffic class credits*/
4241         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4242                                 IXGBE_DCB_TX_CONFIG);
4243         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4244                                 IXGBE_DCB_RX_CONFIG);
4245
4246         if (config_dcb_rx) {
4247                 /* Unpack CEE standard containers */
4248                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4249                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4250                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4251                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4252                 /* Configure PG(ETS) RX */
4253                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4254         }
4255
4256         if (config_dcb_tx) {
4257                 /* Unpack CEE standard containers */
4258                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4259                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
4260                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4261                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4262                 /* Configure PG(ETS) TX */
4263                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4264         }
4265
4266         /*Configure queue statistics registers*/
4267         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4268
4269         /* Check if the PFC is supported */
4270         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4271                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4272                 for (i = 0; i < nb_tcs; i++) {
4273                         /*
4274                         * If the TC count is 8,and the default high_water is 48,
4275                         * the low_water is 16 as default.
4276                         */
4277                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4278                         hw->fc.low_water[i] = pbsize / 4;
4279                         /* Enable pfc for this TC */
4280                         tc = &dcb_config->tc_config[i];
4281                         tc->pfc = ixgbe_dcb_pfc_enabled;
4282                 }
4283                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4284                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4285                         pfc_en &= 0x0F;
4286                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4287         }
4288
4289         return ret;
4290 }
4291
4292 /**
4293  * ixgbe_configure_dcb - Configure DCB  Hardware
4294  * @dev: pointer to rte_eth_dev
4295  */
4296 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4297 {
4298         struct ixgbe_dcb_config *dcb_cfg =
4299                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4300         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4301
4302         PMD_INIT_FUNC_TRACE();
4303
4304         /* check support mq_mode for DCB */
4305         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4306             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4307             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4308                 return;
4309
4310         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4311                 return;
4312
4313         /** Configure DCB hardware **/
4314         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4315 }
4316
4317 /*
4318  * VMDq only support for 10 GbE NIC.
4319  */
4320 static void
4321 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4322 {
4323         struct rte_eth_vmdq_rx_conf *cfg;
4324         struct ixgbe_hw *hw;
4325         enum rte_eth_nb_pools num_pools;
4326         uint32_t mrqc, vt_ctl, vlanctrl;
4327         uint32_t vmolr = 0;
4328         int i;
4329
4330         PMD_INIT_FUNC_TRACE();
4331         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4332         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4333         num_pools = cfg->nb_queue_pools;
4334
4335         ixgbe_rss_disable(dev);
4336
4337         /* MRQC: enable vmdq */
4338         mrqc = IXGBE_MRQC_VMDQEN;
4339         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4340
4341         /* PFVTCTL: turn on virtualisation and set the default pool */
4342         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4343         if (cfg->enable_default_pool)
4344                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4345         else
4346                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4347
4348         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4349
4350         for (i = 0; i < (int)num_pools; i++) {
4351                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4352                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4353         }
4354
4355         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4356         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4357         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4358         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4359
4360         /* VFTA - enable all vlan filters */
4361         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4362                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4363
4364         /* VFRE: pool enabling for receive - 64 */
4365         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4366         if (num_pools == ETH_64_POOLS)
4367                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4368
4369         /*
4370          * MPSAR - allow pools to read specific mac addresses
4371          * In this case, all pools should be able to read from mac addr 0
4372          */
4373         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4374         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4375
4376         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4377         for (i = 0; i < cfg->nb_pool_maps; i++) {
4378                 /* set vlan id in VF register and set the valid bit */
4379                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4380                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4381                 /*
4382                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4383                  * pools, we only need to use the first half of the register
4384                  * i.e. bits 0-31
4385                  */
4386                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4387                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4388                                         (cfg->pool_map[i].pools & UINT32_MAX));
4389                 else
4390                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4391                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4392
4393         }
4394
4395         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4396         if (cfg->enable_loop_back) {
4397                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4398                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4399                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4400         }
4401
4402         IXGBE_WRITE_FLUSH(hw);
4403 }
4404
4405 /*
4406  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4407  * @hw: pointer to hardware structure
4408  */
4409 static void
4410 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4411 {
4412         uint32_t reg;
4413         uint32_t q;
4414
4415         PMD_INIT_FUNC_TRACE();
4416         /*PF VF Transmit Enable*/
4417         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4418         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4419
4420         /* Disable the Tx desc arbiter so that MTQC can be changed */
4421         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4422         reg |= IXGBE_RTTDCS_ARBDIS;
4423         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4424
4425         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4426         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4427
4428         /* Disable drop for all queues */
4429         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4430                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4431                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4432
4433         /* Enable the Tx desc arbiter */
4434         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4435         reg &= ~IXGBE_RTTDCS_ARBDIS;
4436         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4437
4438         IXGBE_WRITE_FLUSH(hw);
4439 }
4440
4441 static int __rte_cold
4442 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4443 {
4444         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4445         uint64_t dma_addr;
4446         unsigned int i;
4447
4448         /* Initialize software ring entries */
4449         for (i = 0; i < rxq->nb_rx_desc; i++) {
4450                 volatile union ixgbe_adv_rx_desc *rxd;
4451                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4452
4453                 if (mbuf == NULL) {
4454                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4455                                      (unsigned) rxq->queue_id);
4456                         return -ENOMEM;
4457                 }
4458
4459                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4460                 mbuf->port = rxq->port_id;
4461
4462                 dma_addr =
4463                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4464                 rxd = &rxq->rx_ring[i];
4465                 rxd->read.hdr_addr = 0;
4466                 rxd->read.pkt_addr = dma_addr;
4467                 rxe[i].mbuf = mbuf;
4468         }
4469
4470         return 0;
4471 }
4472
4473 static int
4474 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4475 {
4476         struct ixgbe_hw *hw;
4477         uint32_t mrqc;
4478
4479         ixgbe_rss_configure(dev);
4480
4481         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4482
4483         /* MRQC: enable VF RSS */
4484         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4485         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4486         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4487         case ETH_64_POOLS:
4488                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4489                 break;
4490
4491         case ETH_32_POOLS:
4492                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4493                 break;
4494
4495         default:
4496                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4497                 return -EINVAL;
4498         }
4499
4500         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4501
4502         return 0;
4503 }
4504
4505 static int
4506 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4507 {
4508         struct ixgbe_hw *hw =
4509                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4510
4511         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4512         case ETH_64_POOLS:
4513                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4514                         IXGBE_MRQC_VMDQEN);
4515                 break;
4516
4517         case ETH_32_POOLS:
4518                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4519                         IXGBE_MRQC_VMDQRT4TCEN);
4520                 break;
4521
4522         case ETH_16_POOLS:
4523                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4524                         IXGBE_MRQC_VMDQRT8TCEN);
4525                 break;
4526         default:
4527                 PMD_INIT_LOG(ERR,
4528                         "invalid pool number in IOV mode");
4529                 break;
4530         }
4531         return 0;
4532 }
4533
4534 static int
4535 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4536 {
4537         struct ixgbe_hw *hw =
4538                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4539
4540         if (hw->mac.type == ixgbe_mac_82598EB)
4541                 return 0;
4542
4543         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4544                 /*
4545                  * SRIOV inactive scheme
4546                  * any DCB/RSS w/o VMDq multi-queue setting
4547                  */
4548                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4549                 case ETH_MQ_RX_RSS:
4550                 case ETH_MQ_RX_DCB_RSS:
4551                 case ETH_MQ_RX_VMDQ_RSS:
4552                         ixgbe_rss_configure(dev);
4553                         break;
4554
4555                 case ETH_MQ_RX_VMDQ_DCB:
4556                         ixgbe_vmdq_dcb_configure(dev);
4557                         break;
4558
4559                 case ETH_MQ_RX_VMDQ_ONLY:
4560                         ixgbe_vmdq_rx_hw_configure(dev);
4561                         break;
4562
4563                 case ETH_MQ_RX_NONE:
4564                 default:
4565                         /* if mq_mode is none, disable rss mode.*/
4566                         ixgbe_rss_disable(dev);
4567                         break;
4568                 }
4569         } else {
4570                 /* SRIOV active scheme
4571                  * Support RSS together with SRIOV.
4572                  */
4573                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4574                 case ETH_MQ_RX_RSS:
4575                 case ETH_MQ_RX_VMDQ_RSS:
4576                         ixgbe_config_vf_rss(dev);
4577                         break;
4578                 case ETH_MQ_RX_VMDQ_DCB:
4579                 case ETH_MQ_RX_DCB:
4580                 /* In SRIOV, the configuration is the same as VMDq case */
4581                         ixgbe_vmdq_dcb_configure(dev);
4582                         break;
4583                 /* DCB/RSS together with SRIOV is not supported */
4584                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4585                 case ETH_MQ_RX_DCB_RSS:
4586                         PMD_INIT_LOG(ERR,
4587                                 "Could not support DCB/RSS with VMDq & SRIOV");
4588                         return -1;
4589                 default:
4590                         ixgbe_config_vf_default(dev);
4591                         break;
4592                 }
4593         }
4594
4595         return 0;
4596 }
4597
4598 static int
4599 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4600 {
4601         struct ixgbe_hw *hw =
4602                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4603         uint32_t mtqc;
4604         uint32_t rttdcs;
4605
4606         if (hw->mac.type == ixgbe_mac_82598EB)
4607                 return 0;
4608
4609         /* disable arbiter before setting MTQC */
4610         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4611         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4612         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4613
4614         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4615                 /*
4616                  * SRIOV inactive scheme
4617                  * any DCB w/o VMDq multi-queue setting
4618                  */
4619                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4620                         ixgbe_vmdq_tx_hw_configure(hw);
4621                 else {
4622                         mtqc = IXGBE_MTQC_64Q_1PB;
4623                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4624                 }
4625         } else {
4626                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4627
4628                 /*
4629                  * SRIOV active scheme
4630                  * FIXME if support DCB together with VMDq & SRIOV
4631                  */
4632                 case ETH_64_POOLS:
4633                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4634                         break;
4635                 case ETH_32_POOLS:
4636                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4637                         break;
4638                 case ETH_16_POOLS:
4639                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4640                                 IXGBE_MTQC_8TC_8TQ;
4641                         break;
4642                 default:
4643                         mtqc = IXGBE_MTQC_64Q_1PB;
4644                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4645                 }
4646                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4647         }
4648
4649         /* re-enable arbiter */
4650         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4651         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4652
4653         return 0;
4654 }
4655
4656 /**
4657  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4658  *
4659  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4660  * spec rev. 3.0 chapter 8.2.3.8.13.
4661  *
4662  * @pool Memory pool of the Rx queue
4663  */
4664 static inline uint32_t
4665 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4666 {
4667         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4668
4669         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4670         uint16_t maxdesc =
4671                 RTE_IPV4_MAX_PKT_LEN /
4672                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4673
4674         if (maxdesc >= 16)
4675                 return IXGBE_RSCCTL_MAXDESC_16;
4676         else if (maxdesc >= 8)
4677                 return IXGBE_RSCCTL_MAXDESC_8;
4678         else if (maxdesc >= 4)
4679                 return IXGBE_RSCCTL_MAXDESC_4;
4680         else
4681                 return IXGBE_RSCCTL_MAXDESC_1;
4682 }
4683
4684 /**
4685  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4686  * interrupt
4687  *
4688  * (Taken from FreeBSD tree)
4689  * (yes this is all very magic and confusing :)
4690  *
4691  * @dev port handle
4692  * @entry the register array entry
4693  * @vector the MSIX vector for this queue
4694  * @type RX/TX/MISC
4695  */
4696 static void
4697 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4698 {
4699         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4700         u32 ivar, index;
4701
4702         vector |= IXGBE_IVAR_ALLOC_VAL;
4703
4704         switch (hw->mac.type) {
4705
4706         case ixgbe_mac_82598EB:
4707                 if (type == -1)
4708                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4709                 else
4710                         entry += (type * 64);
4711                 index = (entry >> 2) & 0x1F;
4712                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4713                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4714                 ivar |= (vector << (8 * (entry & 0x3)));
4715                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4716                 break;
4717
4718         case ixgbe_mac_82599EB:
4719         case ixgbe_mac_X540:
4720                 if (type == -1) { /* MISC IVAR */
4721                         index = (entry & 1) * 8;
4722                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4723                         ivar &= ~(0xFF << index);
4724                         ivar |= (vector << index);
4725                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4726                 } else {        /* RX/TX IVARS */
4727                         index = (16 * (entry & 1)) + (8 * type);
4728                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4729                         ivar &= ~(0xFF << index);
4730                         ivar |= (vector << index);
4731                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4732                 }
4733
4734                 break;
4735
4736         default:
4737                 break;
4738         }
4739 }
4740
4741 void __rte_cold
4742 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4743 {
4744         uint16_t i, rx_using_sse;
4745         struct ixgbe_adapter *adapter = dev->data->dev_private;
4746
4747         /*
4748          * In order to allow Vector Rx there are a few configuration
4749          * conditions to be met and Rx Bulk Allocation should be allowed.
4750          */
4751         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4752             !adapter->rx_bulk_alloc_allowed ||
4753                         rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4754                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4755                                     "preconditions",
4756                              dev->data->port_id);
4757
4758                 adapter->rx_vec_allowed = false;
4759         }
4760
4761         /*
4762          * Initialize the appropriate LRO callback.
4763          *
4764          * If all queues satisfy the bulk allocation preconditions
4765          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4766          * Otherwise use a single allocation version.
4767          */
4768         if (dev->data->lro) {
4769                 if (adapter->rx_bulk_alloc_allowed) {
4770                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4771                                            "allocation version");
4772                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4773                 } else {
4774                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4775                                            "allocation version");
4776                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4777                 }
4778         } else if (dev->data->scattered_rx) {
4779                 /*
4780                  * Set the non-LRO scattered callback: there are Vector and
4781                  * single allocation versions.
4782                  */
4783                 if (adapter->rx_vec_allowed) {
4784                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4785                                             "callback (port=%d).",
4786                                      dev->data->port_id);
4787
4788                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4789                 } else if (adapter->rx_bulk_alloc_allowed) {
4790                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4791                                            "allocation callback (port=%d).",
4792                                      dev->data->port_id);
4793                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4794                 } else {
4795                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4796                                             "single allocation) "
4797                                             "Scattered Rx callback "
4798                                             "(port=%d).",
4799                                      dev->data->port_id);
4800
4801                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4802                 }
4803         /*
4804          * Below we set "simple" callbacks according to port/queues parameters.
4805          * If parameters allow we are going to choose between the following
4806          * callbacks:
4807          *    - Vector
4808          *    - Bulk Allocation
4809          *    - Single buffer allocation (the simplest one)
4810          */
4811         } else if (adapter->rx_vec_allowed) {
4812                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4813                                     "burst size no less than %d (port=%d).",
4814                              RTE_IXGBE_DESCS_PER_LOOP,
4815                              dev->data->port_id);
4816
4817                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4818         } else if (adapter->rx_bulk_alloc_allowed) {
4819                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4820                                     "satisfied. Rx Burst Bulk Alloc function "
4821                                     "will be used on port=%d.",
4822                              dev->data->port_id);
4823
4824                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4825         } else {
4826                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4827                                     "satisfied, or Scattered Rx is requested "
4828                                     "(port=%d).",
4829                              dev->data->port_id);
4830
4831                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4832         }
4833
4834         /* Propagate information about RX function choice through all queues. */
4835
4836         rx_using_sse =
4837                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4838                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4839
4840         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4841                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4842
4843                 rxq->rx_using_sse = rx_using_sse;
4844 #ifdef RTE_LIB_SECURITY
4845                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4846                                 DEV_RX_OFFLOAD_SECURITY);
4847 #endif
4848         }
4849 }
4850
4851 /**
4852  * ixgbe_set_rsc - configure RSC related port HW registers
4853  *
4854  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4855  * of 82599 Spec (x540 configuration is virtually the same).
4856  *
4857  * @dev port handle
4858  *
4859  * Returns 0 in case of success or a non-zero error code
4860  */
4861 static int
4862 ixgbe_set_rsc(struct rte_eth_dev *dev)
4863 {
4864         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4865         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4866         struct rte_eth_dev_info dev_info = { 0 };
4867         bool rsc_capable = false;
4868         uint16_t i;
4869         uint32_t rdrxctl;
4870         uint32_t rfctl;
4871
4872         /* Sanity check */
4873         dev->dev_ops->dev_infos_get(dev, &dev_info);
4874         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4875                 rsc_capable = true;
4876
4877         if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4878                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4879                                    "support it");
4880                 return -EINVAL;
4881         }
4882
4883         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4884
4885         if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4886              (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4887                 /*
4888                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4889                  * 3.0 RSC configuration requires HW CRC stripping being
4890                  * enabled. If user requested both HW CRC stripping off
4891                  * and RSC on - return an error.
4892                  */
4893                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4894                                     "is disabled");
4895                 return -EINVAL;
4896         }
4897
4898         /* RFCTL configuration  */
4899         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4900         if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4901                 /*
4902                  * Since NFS packets coalescing is not supported - clear
4903                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4904                  * enabled.
4905                  */
4906                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4907                            IXGBE_RFCTL_NFSR_DIS);
4908         else
4909                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4910         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4911
4912         /* If LRO hasn't been requested - we are done here. */
4913         if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4914                 return 0;
4915
4916         /* Set RDRXCTL.RSCACKC bit */
4917         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4918         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4919         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4920
4921         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4922         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4923                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4924                 uint32_t srrctl =
4925                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4926                 uint32_t rscctl =
4927                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4928                 uint32_t psrtype =
4929                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4930                 uint32_t eitr =
4931                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4932
4933                 /*
4934                  * ixgbe PMD doesn't support header-split at the moment.
4935                  *
4936                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4937                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4938                  * should be configured even if header split is not
4939                  * enabled. We will configure it 128 bytes following the
4940                  * recommendation in the spec.
4941                  */
4942                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4943                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4944                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4945
4946                 /*
4947                  * TODO: Consider setting the Receive Descriptor Minimum
4948                  * Threshold Size for an RSC case. This is not an obviously
4949                  * beneficiary option but the one worth considering...
4950                  */
4951
4952                 rscctl |= IXGBE_RSCCTL_RSCEN;
4953                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4954                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4955
4956                 /*
4957                  * RSC: Set ITR interval corresponding to 2K ints/s.
4958                  *
4959                  * Full-sized RSC aggregations for a 10Gb/s link will
4960                  * arrive at about 20K aggregation/s rate.
4961                  *
4962                  * 2K inst/s rate will make only 10% of the
4963                  * aggregations to be closed due to the interrupt timer
4964                  * expiration for a streaming at wire-speed case.
4965                  *
4966                  * For a sparse streaming case this setting will yield
4967                  * at most 500us latency for a single RSC aggregation.
4968                  */
4969                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4970                 eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4971                 eitr |= IXGBE_EITR_CNT_WDIS;
4972
4973                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4974                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4975                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4976                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4977
4978                 /*
4979                  * RSC requires the mapping of the queue to the
4980                  * interrupt vector.
4981                  */
4982                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4983         }
4984
4985         dev->data->lro = 1;
4986
4987         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4988
4989         return 0;
4990 }
4991
4992 /*
4993  * Initializes Receive Unit.
4994  */
4995 int __rte_cold
4996 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4997 {
4998         struct ixgbe_hw     *hw;
4999         struct ixgbe_rx_queue *rxq;
5000         uint64_t bus_addr;
5001         uint32_t rxctrl;
5002         uint32_t fctrl;
5003         uint32_t hlreg0;
5004         uint32_t maxfrs;
5005         uint32_t srrctl;
5006         uint32_t rdrxctl;
5007         uint32_t rxcsum;
5008         uint16_t buf_size;
5009         uint16_t i;
5010         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5011         int rc;
5012
5013         PMD_INIT_FUNC_TRACE();
5014         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5015
5016         /*
5017          * Make sure receives are disabled while setting
5018          * up the RX context (registers, descriptor rings, etc.).
5019          */
5020         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5021         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5022
5023         /* Enable receipt of broadcasted frames */
5024         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5025         fctrl |= IXGBE_FCTRL_BAM;
5026         fctrl |= IXGBE_FCTRL_DPF;
5027         fctrl |= IXGBE_FCTRL_PMCF;
5028         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5029
5030         /*
5031          * Configure CRC stripping, if any.
5032          */
5033         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5034         if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5035                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5036         else
5037                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5038
5039         /*
5040          * Configure jumbo frame support, if any.
5041          */
5042         if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5043                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5044                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5045                 maxfrs &= 0x0000FFFF;
5046                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5047                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5048         } else
5049                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5050
5051         /*
5052          * If loopback mode is configured, set LPBK bit.
5053          */
5054         if (dev->data->dev_conf.lpbk_mode != 0) {
5055                 rc = ixgbe_check_supported_loopback_mode(dev);
5056                 if (rc < 0) {
5057                         PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5058                         return rc;
5059                 }
5060                 hlreg0 |= IXGBE_HLREG0_LPBK;
5061         } else {
5062                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
5063         }
5064
5065         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5066
5067         /*
5068          * Assume no header split and no VLAN strip support
5069          * on any Rx queue first .
5070          */
5071         rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5072         /* Setup RX queues */
5073         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5074                 rxq = dev->data->rx_queues[i];
5075
5076                 /*
5077                  * Reset crc_len in case it was changed after queue setup by a
5078                  * call to configure.
5079                  */
5080                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5081                         rxq->crc_len = RTE_ETHER_CRC_LEN;
5082                 else
5083                         rxq->crc_len = 0;
5084
5085                 /* Setup the Base and Length of the Rx Descriptor Rings */
5086                 bus_addr = rxq->rx_ring_phys_addr;
5087                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5088                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5089                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5090                                 (uint32_t)(bus_addr >> 32));
5091                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5092                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5093                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5094                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5095
5096                 /* Configure the SRRCTL register */
5097                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5098
5099                 /* Set if packets are dropped when no descriptors available */
5100                 if (rxq->drop_en)
5101                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5102
5103                 /*
5104                  * Configure the RX buffer size in the BSIZEPACKET field of
5105                  * the SRRCTL register of the queue.
5106                  * The value is in 1 KB resolution. Valid values can be from
5107                  * 1 KB to 16 KB.
5108                  */
5109                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5110                         RTE_PKTMBUF_HEADROOM);
5111                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5112                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5113
5114                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5115
5116                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5117                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5118
5119                 /* It adds dual VLAN length for supporting dual VLAN */
5120                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5121                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5122                         dev->data->scattered_rx = 1;
5123                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5124                         rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5125         }
5126
5127         if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5128                 dev->data->scattered_rx = 1;
5129
5130         /*
5131          * Device configured with multiple RX queues.
5132          */
5133         ixgbe_dev_mq_rx_configure(dev);
5134
5135         /*
5136          * Setup the Checksum Register.
5137          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5138          * Enable IP/L4 checkum computation by hardware if requested to do so.
5139          */
5140         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5141         rxcsum |= IXGBE_RXCSUM_PCSD;
5142         if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5143                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
5144         else
5145                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5146
5147         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5148
5149         if (hw->mac.type == ixgbe_mac_82599EB ||
5150             hw->mac.type == ixgbe_mac_X540) {
5151                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5152                 if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5153                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5154                 else
5155                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5156                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5157                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5158         }
5159
5160         rc = ixgbe_set_rsc(dev);
5161         if (rc)
5162                 return rc;
5163
5164         ixgbe_set_rx_function(dev);
5165
5166         return 0;
5167 }
5168
5169 /*
5170  * Initializes Transmit Unit.
5171  */
5172 void __rte_cold
5173 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5174 {
5175         struct ixgbe_hw     *hw;
5176         struct ixgbe_tx_queue *txq;
5177         uint64_t bus_addr;
5178         uint32_t hlreg0;
5179         uint32_t txctrl;
5180         uint16_t i;
5181
5182         PMD_INIT_FUNC_TRACE();
5183         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5184
5185         /* Enable TX CRC (checksum offload requirement) and hw padding
5186          * (TSO requirement)
5187          */
5188         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5189         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5190         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5191
5192         /* Setup the Base and Length of the Tx Descriptor Rings */
5193         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5194                 txq = dev->data->tx_queues[i];
5195
5196                 bus_addr = txq->tx_ring_phys_addr;
5197                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5198                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5199                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5200                                 (uint32_t)(bus_addr >> 32));
5201                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5202                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5203                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5204                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5205                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5206
5207                 /*
5208                  * Disable Tx Head Writeback RO bit, since this hoses
5209                  * bookkeeping if things aren't delivered in order.
5210                  */
5211                 switch (hw->mac.type) {
5212                 case ixgbe_mac_82598EB:
5213                         txctrl = IXGBE_READ_REG(hw,
5214                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
5215                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5216                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5217                                         txctrl);
5218                         break;
5219
5220                 case ixgbe_mac_82599EB:
5221                 case ixgbe_mac_X540:
5222                 case ixgbe_mac_X550:
5223                 case ixgbe_mac_X550EM_x:
5224                 case ixgbe_mac_X550EM_a:
5225                 default:
5226                         txctrl = IXGBE_READ_REG(hw,
5227                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5228                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5229                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5230                                         txctrl);
5231                         break;
5232                 }
5233         }
5234
5235         /* Device configured with multiple TX queues. */
5236         ixgbe_dev_mq_tx_configure(dev);
5237 }
5238
5239 /*
5240  * Check if requested loopback mode is supported
5241  */
5242 int
5243 ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5244 {
5245         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5246
5247         if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5248                 if (hw->mac.type == ixgbe_mac_82599EB ||
5249                      hw->mac.type == ixgbe_mac_X540 ||
5250                      hw->mac.type == ixgbe_mac_X550 ||
5251                      hw->mac.type == ixgbe_mac_X550EM_x ||
5252                      hw->mac.type == ixgbe_mac_X550EM_a)
5253                         return 0;
5254
5255         return -ENOTSUP;
5256 }
5257
5258 /*
5259  * Set up link for 82599 loopback mode Tx->Rx.
5260  */
5261 static inline void __rte_cold
5262 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5263 {
5264         PMD_INIT_FUNC_TRACE();
5265
5266         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5267                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5268                                 IXGBE_SUCCESS) {
5269                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5270                         /* ignore error */
5271                         return;
5272                 }
5273         }
5274
5275         /* Restart link */
5276         IXGBE_WRITE_REG(hw,
5277                         IXGBE_AUTOC,
5278                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5279         ixgbe_reset_pipeline_82599(hw);
5280
5281         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5282         msec_delay(50);
5283 }
5284
5285
5286 /*
5287  * Start Transmit and Receive Units.
5288  */
5289 int __rte_cold
5290 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5291 {
5292         struct ixgbe_hw     *hw;
5293         struct ixgbe_tx_queue *txq;
5294         struct ixgbe_rx_queue *rxq;
5295         uint32_t txdctl;
5296         uint32_t dmatxctl;
5297         uint32_t rxctrl;
5298         uint16_t i;
5299         int ret = 0;
5300
5301         PMD_INIT_FUNC_TRACE();
5302         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5303
5304         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5305                 txq = dev->data->tx_queues[i];
5306                 /* Setup Transmit Threshold Registers */
5307                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5308                 txdctl |= txq->pthresh & 0x7F;
5309                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5310                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5311                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5312         }
5313
5314         if (hw->mac.type != ixgbe_mac_82598EB) {
5315                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5316                 dmatxctl |= IXGBE_DMATXCTL_TE;
5317                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5318         }
5319
5320         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5321                 txq = dev->data->tx_queues[i];
5322                 if (!txq->tx_deferred_start) {
5323                         ret = ixgbe_dev_tx_queue_start(dev, i);
5324                         if (ret < 0)
5325                                 return ret;
5326                 }
5327         }
5328
5329         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5330                 rxq = dev->data->rx_queues[i];
5331                 if (!rxq->rx_deferred_start) {
5332                         ret = ixgbe_dev_rx_queue_start(dev, i);
5333                         if (ret < 0)
5334                                 return ret;
5335                 }
5336         }
5337
5338         /* Enable Receive engine */
5339         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5340         if (hw->mac.type == ixgbe_mac_82598EB)
5341                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5342         rxctrl |= IXGBE_RXCTRL_RXEN;
5343         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5344
5345         /* If loopback mode is enabled, set up the link accordingly */
5346         if (dev->data->dev_conf.lpbk_mode != 0) {
5347                 if (hw->mac.type == ixgbe_mac_82599EB)
5348                         ixgbe_setup_loopback_link_82599(hw);
5349                 else if (hw->mac.type == ixgbe_mac_X540 ||
5350                      hw->mac.type == ixgbe_mac_X550 ||
5351                      hw->mac.type == ixgbe_mac_X550EM_x ||
5352                      hw->mac.type == ixgbe_mac_X550EM_a)
5353                         ixgbe_setup_loopback_link_x540_x550(hw, true);
5354         }
5355
5356 #ifdef RTE_LIB_SECURITY
5357         if ((dev->data->dev_conf.rxmode.offloads &
5358                         DEV_RX_OFFLOAD_SECURITY) ||
5359                 (dev->data->dev_conf.txmode.offloads &
5360                         DEV_TX_OFFLOAD_SECURITY)) {
5361                 ret = ixgbe_crypto_enable_ipsec(dev);
5362                 if (ret != 0) {
5363                         PMD_DRV_LOG(ERR,
5364                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5365                                     ret);
5366                         return ret;
5367                 }
5368         }
5369 #endif
5370
5371         return 0;
5372 }
5373
5374 /*
5375  * Start Receive Units for specified queue.
5376  */
5377 int __rte_cold
5378 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5379 {
5380         struct ixgbe_hw     *hw;
5381         struct ixgbe_rx_queue *rxq;
5382         uint32_t rxdctl;
5383         int poll_ms;
5384
5385         PMD_INIT_FUNC_TRACE();
5386         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5387
5388         rxq = dev->data->rx_queues[rx_queue_id];
5389
5390         /* Allocate buffers for descriptor rings */
5391         if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5392                 PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5393                              rx_queue_id);
5394                 return -1;
5395         }
5396         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5397         rxdctl |= IXGBE_RXDCTL_ENABLE;
5398         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5399
5400         /* Wait until RX Enable ready */
5401         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5402         do {
5403                 rte_delay_ms(1);
5404                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5405         } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5406         if (!poll_ms)
5407                 PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5408         rte_wmb();
5409         IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5410         IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5411         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5412
5413         return 0;
5414 }
5415
5416 /*
5417  * Stop Receive Units for specified queue.
5418  */
5419 int __rte_cold
5420 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5421 {
5422         struct ixgbe_hw     *hw;
5423         struct ixgbe_adapter *adapter = dev->data->dev_private;
5424         struct ixgbe_rx_queue *rxq;
5425         uint32_t rxdctl;
5426         int poll_ms;
5427
5428         PMD_INIT_FUNC_TRACE();
5429         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5430
5431         rxq = dev->data->rx_queues[rx_queue_id];
5432
5433         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5434         rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5435         IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5436
5437         /* Wait until RX Enable bit clear */
5438         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5439         do {
5440                 rte_delay_ms(1);
5441                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5442         } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5443         if (!poll_ms)
5444                 PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5445
5446         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5447
5448         ixgbe_rx_queue_release_mbufs(rxq);
5449         ixgbe_reset_rx_queue(adapter, rxq);
5450         dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5451
5452         return 0;
5453 }
5454
5455
5456 /*
5457  * Start Transmit Units for specified queue.
5458  */
5459 int __rte_cold
5460 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5461 {
5462         struct ixgbe_hw     *hw;
5463         struct ixgbe_tx_queue *txq;
5464         uint32_t txdctl;
5465         int poll_ms;
5466
5467         PMD_INIT_FUNC_TRACE();
5468         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5469
5470         txq = dev->data->tx_queues[tx_queue_id];
5471         IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5472         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5473         txdctl |= IXGBE_TXDCTL_ENABLE;
5474         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5475
5476         /* Wait until TX Enable ready */
5477         if (hw->mac.type == ixgbe_mac_82599EB) {
5478                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5479                 do {
5480                         rte_delay_ms(1);
5481                         txdctl = IXGBE_READ_REG(hw,
5482                                 IXGBE_TXDCTL(txq->reg_idx));
5483                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5484                 if (!poll_ms)
5485                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5486                                 tx_queue_id);
5487         }
5488         rte_wmb();
5489         IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5490         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5491
5492         return 0;
5493 }
5494
5495 /*
5496  * Stop Transmit Units for specified queue.
5497  */
5498 int __rte_cold
5499 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5500 {
5501         struct ixgbe_hw     *hw;
5502         struct ixgbe_tx_queue *txq;
5503         uint32_t txdctl;
5504         uint32_t txtdh, txtdt;
5505         int poll_ms;
5506
5507         PMD_INIT_FUNC_TRACE();
5508         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5509
5510         txq = dev->data->tx_queues[tx_queue_id];
5511
5512         /* Wait until TX queue is empty */
5513         if (hw->mac.type == ixgbe_mac_82599EB) {
5514                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5515                 do {
5516                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5517                         txtdh = IXGBE_READ_REG(hw,
5518                                                IXGBE_TDH(txq->reg_idx));
5519                         txtdt = IXGBE_READ_REG(hw,
5520                                                IXGBE_TDT(txq->reg_idx));
5521                 } while (--poll_ms && (txtdh != txtdt));
5522                 if (!poll_ms)
5523                         PMD_INIT_LOG(ERR,
5524                                 "Tx Queue %d is not empty when stopping.",
5525                                 tx_queue_id);
5526         }
5527
5528         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5529         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5530         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5531
5532         /* Wait until TX Enable bit clear */
5533         if (hw->mac.type == ixgbe_mac_82599EB) {
5534                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5535                 do {
5536                         rte_delay_ms(1);
5537                         txdctl = IXGBE_READ_REG(hw,
5538                                                 IXGBE_TXDCTL(txq->reg_idx));
5539                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5540                 if (!poll_ms)
5541                         PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5542                                 tx_queue_id);
5543         }
5544
5545         if (txq->ops != NULL) {
5546                 txq->ops->release_mbufs(txq);
5547                 txq->ops->reset(txq);
5548         }
5549         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5550
5551         return 0;
5552 }
5553
5554 void
5555 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5556         struct rte_eth_rxq_info *qinfo)
5557 {
5558         struct ixgbe_rx_queue *rxq;
5559
5560         rxq = dev->data->rx_queues[queue_id];
5561
5562         qinfo->mp = rxq->mb_pool;
5563         qinfo->scattered_rx = dev->data->scattered_rx;
5564         qinfo->nb_desc = rxq->nb_rx_desc;
5565
5566         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5567         qinfo->conf.rx_drop_en = rxq->drop_en;
5568         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5569         qinfo->conf.offloads = rxq->offloads;
5570 }
5571
5572 void
5573 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5574         struct rte_eth_txq_info *qinfo)
5575 {
5576         struct ixgbe_tx_queue *txq;
5577
5578         txq = dev->data->tx_queues[queue_id];
5579
5580         qinfo->nb_desc = txq->nb_tx_desc;
5581
5582         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5583         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5584         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5585
5586         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5587         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5588         qinfo->conf.offloads = txq->offloads;
5589         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5590 }
5591
5592 /*
5593  * [VF] Initializes Receive Unit.
5594  */
5595 int __rte_cold
5596 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5597 {
5598         struct ixgbe_hw     *hw;
5599         struct ixgbe_rx_queue *rxq;
5600         struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5601         uint64_t bus_addr;
5602         uint32_t srrctl, psrtype = 0;
5603         uint16_t buf_size;
5604         uint16_t i;
5605         int ret;
5606
5607         PMD_INIT_FUNC_TRACE();
5608         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5609
5610         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5611                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5612                         "it should be power of 2");
5613                 return -1;
5614         }
5615
5616         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5617                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5618                         "it should be equal to or less than %d",
5619                         hw->mac.max_rx_queues);
5620                 return -1;
5621         }
5622
5623         /*
5624          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5625          * disables the VF receipt of packets if the PF MTU is > 1500.
5626          * This is done to deal with 82599 limitations that imposes
5627          * the PF and all VFs to share the same MTU.
5628          * Then, the PF driver enables again the VF receipt of packet when
5629          * the VF driver issues a IXGBE_VF_SET_LPE request.
5630          * In the meantime, the VF device cannot be used, even if the VF driver
5631          * and the Guest VM network stack are ready to accept packets with a
5632          * size up to the PF MTU.
5633          * As a work-around to this PF behaviour, force the call to
5634          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5635          * VF packets received can work in all cases.
5636          */
5637         ixgbevf_rlpml_set_vf(hw,
5638                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5639
5640         /*
5641          * Assume no header split and no VLAN strip support
5642          * on any Rx queue first .
5643          */
5644         rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5645         /* Setup RX queues */
5646         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5647                 rxq = dev->data->rx_queues[i];
5648
5649                 /* Allocate buffers for descriptor rings */
5650                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5651                 if (ret)
5652                         return ret;
5653
5654                 /* Setup the Base and Length of the Rx Descriptor Rings */
5655                 bus_addr = rxq->rx_ring_phys_addr;
5656
5657                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5658                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5659                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5660                                 (uint32_t)(bus_addr >> 32));
5661                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5662                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5663                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5664                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5665
5666
5667                 /* Configure the SRRCTL register */
5668                 srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5669
5670                 /* Set if packets are dropped when no descriptors available */
5671                 if (rxq->drop_en)
5672                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5673
5674                 /*
5675                  * Configure the RX buffer size in the BSIZEPACKET field of
5676                  * the SRRCTL register of the queue.
5677                  * The value is in 1 KB resolution. Valid values can be from
5678                  * 1 KB to 16 KB.
5679                  */
5680                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5681                         RTE_PKTMBUF_HEADROOM);
5682                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5683                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5684
5685                 /*
5686                  * VF modification to write virtual function SRRCTL register
5687                  */
5688                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5689
5690                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5691                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5692
5693                 if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5694                     /* It adds dual VLAN length for supporting dual VLAN */
5695                     (rxmode->max_rx_pkt_len +
5696                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5697                         if (!dev->data->scattered_rx)
5698                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5699                         dev->data->scattered_rx = 1;
5700                 }
5701
5702                 if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5703                         rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5704         }
5705
5706         /* Set RQPL for VF RSS according to max Rx queue */
5707         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5708                 IXGBE_PSRTYPE_RQPL_SHIFT;
5709         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5710
5711         ixgbe_set_rx_function(dev);
5712
5713         return 0;
5714 }
5715
5716 /*
5717  * [VF] Initializes Transmit Unit.
5718  */
5719 void __rte_cold
5720 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5721 {
5722         struct ixgbe_hw     *hw;
5723         struct ixgbe_tx_queue *txq;
5724         uint64_t bus_addr;
5725         uint32_t txctrl;
5726         uint16_t i;
5727
5728         PMD_INIT_FUNC_TRACE();
5729         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5730
5731         /* Setup the Base and Length of the Tx Descriptor Rings */
5732         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5733                 txq = dev->data->tx_queues[i];
5734                 bus_addr = txq->tx_ring_phys_addr;
5735                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5736                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5737                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5738                                 (uint32_t)(bus_addr >> 32));
5739                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5740                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5741                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5742                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5743                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5744
5745                 /*
5746                  * Disable Tx Head Writeback RO bit, since this hoses
5747                  * bookkeeping if things aren't delivered in order.
5748                  */
5749                 txctrl = IXGBE_READ_REG(hw,
5750                                 IXGBE_VFDCA_TXCTRL(i));
5751                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5752                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5753                                 txctrl);
5754         }
5755 }
5756
5757 /*
5758  * [VF] Start Transmit and Receive Units.
5759  */
5760 void __rte_cold
5761 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5762 {
5763         struct ixgbe_hw     *hw;
5764         struct ixgbe_tx_queue *txq;
5765         struct ixgbe_rx_queue *rxq;
5766         uint32_t txdctl;
5767         uint32_t rxdctl;
5768         uint16_t i;
5769         int poll_ms;
5770
5771         PMD_INIT_FUNC_TRACE();
5772         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5773
5774         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5775                 txq = dev->data->tx_queues[i];
5776                 /* Setup Transmit Threshold Registers */
5777                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5778                 txdctl |= txq->pthresh & 0x7F;
5779                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5780                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5781                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5782         }
5783
5784         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5785
5786                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5787                 txdctl |= IXGBE_TXDCTL_ENABLE;
5788                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5789
5790                 poll_ms = 10;
5791                 /* Wait until TX Enable ready */
5792                 do {
5793                         rte_delay_ms(1);
5794                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5795                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5796                 if (!poll_ms)
5797                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5798         }
5799         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5800
5801                 rxq = dev->data->rx_queues[i];
5802
5803                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5804                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5805                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5806
5807                 /* Wait until RX Enable ready */
5808                 poll_ms = 10;
5809                 do {
5810                         rte_delay_ms(1);
5811                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5812                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5813                 if (!poll_ms)
5814                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5815                 rte_wmb();
5816                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5817
5818         }
5819 }
5820
5821 int
5822 ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5823                     const struct rte_flow_action_rss *in)
5824 {
5825         if (in->key_len > RTE_DIM(out->key) ||
5826             in->queue_num > RTE_DIM(out->queue))
5827                 return -EINVAL;
5828         out->conf = (struct rte_flow_action_rss){
5829                 .func = in->func,
5830                 .level = in->level,
5831                 .types = in->types,
5832                 .key_len = in->key_len,
5833                 .queue_num = in->queue_num,
5834                 .key = memcpy(out->key, in->key, in->key_len),
5835                 .queue = memcpy(out->queue, in->queue,
5836                                 sizeof(*in->queue) * in->queue_num),
5837         };
5838         return 0;
5839 }
5840
5841 int
5842 ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5843                       const struct rte_flow_action_rss *with)
5844 {
5845         return (comp->func == with->func &&
5846                 comp->level == with->level &&
5847                 comp->types == with->types &&
5848                 comp->key_len == with->key_len &&
5849                 comp->queue_num == with->queue_num &&
5850                 !memcmp(comp->key, with->key, with->key_len) &&
5851                 !memcmp(comp->queue, with->queue,
5852                         sizeof(*with->queue) * with->queue_num));
5853 }
5854
5855 int
5856 ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5857                 struct ixgbe_rte_flow_rss_conf *conf, bool add)
5858 {
5859         struct ixgbe_hw *hw;
5860         uint32_t reta;
5861         uint16_t i;
5862         uint16_t j;
5863         uint16_t sp_reta_size;
5864         uint32_t reta_reg;
5865         struct rte_eth_rss_conf rss_conf = {
5866                 .rss_key = conf->conf.key_len ?
5867                         (void *)(uintptr_t)conf->conf.key : NULL,
5868                 .rss_key_len = conf->conf.key_len,
5869                 .rss_hf = conf->conf.types,
5870         };
5871         struct ixgbe_filter_info *filter_info =
5872                 IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5873
5874         PMD_INIT_FUNC_TRACE();
5875         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5876
5877         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5878
5879         if (!add) {
5880                 if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5881                                           &conf->conf)) {
5882                         ixgbe_rss_disable(dev);
5883                         memset(&filter_info->rss_info, 0,
5884                                 sizeof(struct ixgbe_rte_flow_rss_conf));
5885                         return 0;
5886                 }
5887                 return -EINVAL;
5888         }
5889
5890         if (filter_info->rss_info.conf.queue_num)
5891                 return -EINVAL;
5892         /* Fill in redirection table
5893          * The byte-swap is needed because NIC registers are in
5894          * little-endian order.
5895          */
5896         reta = 0;
5897         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5898                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5899
5900                 if (j == conf->conf.queue_num)
5901                         j = 0;
5902                 reta = (reta << 8) | conf->conf.queue[j];
5903                 if ((i & 3) == 3)
5904                         IXGBE_WRITE_REG(hw, reta_reg,
5905                                         rte_bswap32(reta));
5906         }
5907
5908         /* Configure the RSS key and the RSS protocols used to compute
5909          * the RSS hash of input packets.
5910          */
5911         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5912                 ixgbe_rss_disable(dev);
5913                 return 0;
5914         }
5915         if (rss_conf.rss_key == NULL)
5916                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
5917         ixgbe_hw_rss_hash_set(hw, &rss_conf);
5918
5919         if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5920                 return -EINVAL;
5921
5922         return 0;
5923 }
5924
5925 /* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5926 #if defined(RTE_ARCH_PPC_64)
5927 int
5928 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5929 {
5930         return -1;
5931 }
5932
5933 uint16_t
5934 ixgbe_recv_pkts_vec(
5935         void __rte_unused *rx_queue,
5936         struct rte_mbuf __rte_unused **rx_pkts,
5937         uint16_t __rte_unused nb_pkts)
5938 {
5939         return 0;
5940 }
5941
5942 uint16_t
5943 ixgbe_recv_scattered_pkts_vec(
5944         void __rte_unused *rx_queue,
5945         struct rte_mbuf __rte_unused **rx_pkts,
5946         uint16_t __rte_unused nb_pkts)
5947 {
5948         return 0;
5949 }
5950
5951 int
5952 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5953 {
5954         return -1;
5955 }
5956
5957 uint16_t
5958 ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5959                 struct rte_mbuf __rte_unused **tx_pkts,
5960                 uint16_t __rte_unused nb_pkts)
5961 {
5962         return 0;
5963 }
5964
5965 int
5966 ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5967 {
5968         return -1;
5969 }
5970
5971 void
5972 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
5973 {
5974         return;
5975 }
5976 #endif