drivers/net: fix number of segment storage type
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 PKT_TX_SEC_OFFLOAD |     \
97                 IXGBE_TX_IEEE1588_TMST)
98
99 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
100                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
101
102 #if 1
103 #define RTE_PMD_USE_PREFETCH
104 #endif
105
106 #ifdef RTE_PMD_USE_PREFETCH
107 /*
108  * Prefetch a cache line into all cache levels.
109  */
110 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
111 #else
112 #define rte_ixgbe_prefetch(p)   do {} while (0)
113 #endif
114
115 #ifdef RTE_IXGBE_INC_VECTOR
116 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
117                                     uint16_t nb_pkts);
118 #endif
119
120 /*********************************************************************
121  *
122  *  TX functions
123  *
124  **********************************************************************/
125
126 /*
127  * Check for descriptors with their DD bit set and free mbufs.
128  * Return the total number of buffers freed.
129  */
130 static __rte_always_inline int
131 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
132 {
133         struct ixgbe_tx_entry *txep;
134         uint32_t status;
135         int i, nb_free = 0;
136         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
137
138         /* check DD bit on threshold descriptor */
139         status = txq->tx_ring[txq->tx_next_dd].wb.status;
140         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
141                 return 0;
142
143         /*
144          * first buffer to free from S/W ring is at index
145          * tx_next_dd - (tx_rs_thresh-1)
146          */
147         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
148
149         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
150                 /* free buffers one at a time */
151                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
152                 txep->mbuf = NULL;
153
154                 if (unlikely(m == NULL))
155                         continue;
156
157                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
158                     (nb_free > 0 && m->pool != free[0]->pool)) {
159                         rte_mempool_put_bulk(free[0]->pool,
160                                              (void **)free, nb_free);
161                         nb_free = 0;
162                 }
163
164                 free[nb_free++] = m;
165         }
166
167         if (nb_free > 0)
168                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
169
170         /* buffers were freed, update counters */
171         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
172         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
173         if (txq->tx_next_dd >= txq->nb_tx_desc)
174                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
175
176         return txq->tx_rs_thresh;
177 }
178
179 /* Populate 4 descriptors with data from 4 mbufs */
180 static inline void
181 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
182 {
183         uint64_t buf_dma_addr;
184         uint32_t pkt_len;
185         int i;
186
187         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
188                 buf_dma_addr = rte_mbuf_data_iova(*pkts);
189                 pkt_len = (*pkts)->data_len;
190
191                 /* write data to descriptor */
192                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
193
194                 txdp->read.cmd_type_len =
195                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
196
197                 txdp->read.olinfo_status =
198                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
199
200                 rte_prefetch0(&(*pkts)->pool);
201         }
202 }
203
204 /* Populate 1 descriptor with data from 1 mbuf */
205 static inline void
206 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
207 {
208         uint64_t buf_dma_addr;
209         uint32_t pkt_len;
210
211         buf_dma_addr = rte_mbuf_data_iova(*pkts);
212         pkt_len = (*pkts)->data_len;
213
214         /* write data to descriptor */
215         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
216         txdp->read.cmd_type_len =
217                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
218         txdp->read.olinfo_status =
219                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
220         rte_prefetch0(&(*pkts)->pool);
221 }
222
223 /*
224  * Fill H/W descriptor ring with mbuf data.
225  * Copy mbuf pointers to the S/W ring.
226  */
227 static inline void
228 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
229                       uint16_t nb_pkts)
230 {
231         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
232         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
233         const int N_PER_LOOP = 4;
234         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
235         int mainpart, leftover;
236         int i, j;
237
238         /*
239          * Process most of the packets in chunks of N pkts.  Any
240          * leftover packets will get processed one at a time.
241          */
242         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
243         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
244         for (i = 0; i < mainpart; i += N_PER_LOOP) {
245                 /* Copy N mbuf pointers to the S/W ring */
246                 for (j = 0; j < N_PER_LOOP; ++j) {
247                         (txep + i + j)->mbuf = *(pkts + i + j);
248                 }
249                 tx4(txdp + i, pkts + i);
250         }
251
252         if (unlikely(leftover > 0)) {
253                 for (i = 0; i < leftover; ++i) {
254                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
255                         tx1(txdp + mainpart + i, pkts + mainpart + i);
256                 }
257         }
258 }
259
260 static inline uint16_t
261 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
262              uint16_t nb_pkts)
263 {
264         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
265         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
266         uint16_t n = 0;
267
268         /*
269          * Begin scanning the H/W ring for done descriptors when the
270          * number of available descriptors drops below tx_free_thresh.  For
271          * each done descriptor, free the associated buffer.
272          */
273         if (txq->nb_tx_free < txq->tx_free_thresh)
274                 ixgbe_tx_free_bufs(txq);
275
276         /* Only use descriptors that are available */
277         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
278         if (unlikely(nb_pkts == 0))
279                 return 0;
280
281         /* Use exactly nb_pkts descriptors */
282         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
283
284         /*
285          * At this point, we know there are enough descriptors in the
286          * ring to transmit all the packets.  This assumes that each
287          * mbuf contains a single segment, and that no new offloads
288          * are expected, which would require a new context descriptor.
289          */
290
291         /*
292          * See if we're going to wrap-around. If so, handle the top
293          * of the descriptor ring first, then do the bottom.  If not,
294          * the processing looks just like the "bottom" part anyway...
295          */
296         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
297                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
298                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
299
300                 /*
301                  * We know that the last descriptor in the ring will need to
302                  * have its RS bit set because tx_rs_thresh has to be
303                  * a divisor of the ring size
304                  */
305                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
306                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
307                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
308
309                 txq->tx_tail = 0;
310         }
311
312         /* Fill H/W descriptor ring with mbuf data */
313         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
314         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
315
316         /*
317          * Determine if RS bit should be set
318          * This is what we actually want:
319          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
320          * but instead of subtracting 1 and doing >=, we can just do
321          * greater than without subtracting.
322          */
323         if (txq->tx_tail > txq->tx_next_rs) {
324                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
325                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
326                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
327                                                 txq->tx_rs_thresh);
328                 if (txq->tx_next_rs >= txq->nb_tx_desc)
329                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
330         }
331
332         /*
333          * Check for wrap-around. This would only happen if we used
334          * up to the last descriptor in the ring, no more, no less.
335          */
336         if (txq->tx_tail >= txq->nb_tx_desc)
337                 txq->tx_tail = 0;
338
339         /* update tail pointer */
340         rte_wmb();
341         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
342
343         return nb_pkts;
344 }
345
346 uint16_t
347 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
348                        uint16_t nb_pkts)
349 {
350         uint16_t nb_tx;
351
352         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
353         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
354                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
355
356         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
357         nb_tx = 0;
358         while (nb_pkts) {
359                 uint16_t ret, n;
360
361                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
362                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
363                 nb_tx = (uint16_t)(nb_tx + ret);
364                 nb_pkts = (uint16_t)(nb_pkts - ret);
365                 if (ret < n)
366                         break;
367         }
368
369         return nb_tx;
370 }
371
372 #ifdef RTE_IXGBE_INC_VECTOR
373 static uint16_t
374 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
375                     uint16_t nb_pkts)
376 {
377         uint16_t nb_tx = 0;
378         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
379
380         while (nb_pkts) {
381                 uint16_t ret, num;
382
383                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
384                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
385                                                  num);
386                 nb_tx += ret;
387                 nb_pkts -= ret;
388                 if (ret < num)
389                         break;
390         }
391
392         return nb_tx;
393 }
394 #endif
395
396 static inline void
397 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
398                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
399                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
400                 __rte_unused uint64_t *mdata)
401 {
402         uint32_t type_tucmd_mlhl;
403         uint32_t mss_l4len_idx = 0;
404         uint32_t ctx_idx;
405         uint32_t vlan_macip_lens;
406         union ixgbe_tx_offload tx_offload_mask;
407         uint32_t seqnum_seed = 0;
408
409         ctx_idx = txq->ctx_curr;
410         tx_offload_mask.data[0] = 0;
411         tx_offload_mask.data[1] = 0;
412         type_tucmd_mlhl = 0;
413
414         /* Specify which HW CTX to upload. */
415         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
416
417         if (ol_flags & PKT_TX_VLAN_PKT) {
418                 tx_offload_mask.vlan_tci |= ~0;
419         }
420
421         /* check if TCP segmentation required for this packet */
422         if (ol_flags & PKT_TX_TCP_SEG) {
423                 /* implies IP cksum in IPv4 */
424                 if (ol_flags & PKT_TX_IP_CKSUM)
425                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
426                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
427                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
428                 else
429                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
430                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
431                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
432
433                 tx_offload_mask.l2_len |= ~0;
434                 tx_offload_mask.l3_len |= ~0;
435                 tx_offload_mask.l4_len |= ~0;
436                 tx_offload_mask.tso_segsz |= ~0;
437                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
438                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
439         } else { /* no TSO, check if hardware checksum is needed */
440                 if (ol_flags & PKT_TX_IP_CKSUM) {
441                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
442                         tx_offload_mask.l2_len |= ~0;
443                         tx_offload_mask.l3_len |= ~0;
444                 }
445
446                 switch (ol_flags & PKT_TX_L4_MASK) {
447                 case PKT_TX_UDP_CKSUM:
448                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
449                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
450                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
451                         tx_offload_mask.l2_len |= ~0;
452                         tx_offload_mask.l3_len |= ~0;
453                         break;
454                 case PKT_TX_TCP_CKSUM:
455                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
456                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
457                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
458                         tx_offload_mask.l2_len |= ~0;
459                         tx_offload_mask.l3_len |= ~0;
460                         break;
461                 case PKT_TX_SCTP_CKSUM:
462                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
463                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
464                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
465                         tx_offload_mask.l2_len |= ~0;
466                         tx_offload_mask.l3_len |= ~0;
467                         break;
468                 default:
469                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
470                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
471                         break;
472                 }
473         }
474
475         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
476                 tx_offload_mask.outer_l2_len |= ~0;
477                 tx_offload_mask.outer_l3_len |= ~0;
478                 tx_offload_mask.l2_len |= ~0;
479                 seqnum_seed |= tx_offload.outer_l3_len
480                                << IXGBE_ADVTXD_OUTER_IPLEN;
481                 seqnum_seed |= tx_offload.l2_len
482                                << IXGBE_ADVTXD_TUNNEL_LEN;
483         }
484 #ifdef RTE_LIBRTE_SECURITY
485         if (ol_flags & PKT_TX_SEC_OFFLOAD) {
486                 union ixgbe_crypto_tx_desc_md *md =
487                                 (union ixgbe_crypto_tx_desc_md *)mdata;
488                 seqnum_seed |=
489                         (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
490                 type_tucmd_mlhl |= md->enc ?
491                                 (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
492                                 IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
493                 type_tucmd_mlhl |=
494                         (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
495                 tx_offload_mask.sa_idx |= ~0;
496                 tx_offload_mask.sec_pad_len |= ~0;
497         }
498 #endif
499
500         txq->ctx_cache[ctx_idx].flags = ol_flags;
501         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
502                 tx_offload_mask.data[0] & tx_offload.data[0];
503         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
504                 tx_offload_mask.data[1] & tx_offload.data[1];
505         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
506
507         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
508         vlan_macip_lens = tx_offload.l3_len;
509         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
510                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
511                                     IXGBE_ADVTXD_MACLEN_SHIFT);
512         else
513                 vlan_macip_lens |= (tx_offload.l2_len <<
514                                     IXGBE_ADVTXD_MACLEN_SHIFT);
515         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
516         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
517         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
518         ctx_txd->seqnum_seed     = seqnum_seed;
519 }
520
521 /*
522  * Check which hardware context can be used. Use the existing match
523  * or create a new context descriptor.
524  */
525 static inline uint32_t
526 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
527                    union ixgbe_tx_offload tx_offload)
528 {
529         /* If match with the current used context */
530         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
531                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
532                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
533                      & tx_offload.data[0])) &&
534                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
535                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
536                      & tx_offload.data[1]))))
537                 return txq->ctx_curr;
538
539         /* What if match with the next context  */
540         txq->ctx_curr ^= 1;
541         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
542                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
543                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
544                      & tx_offload.data[0])) &&
545                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
546                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
547                      & tx_offload.data[1]))))
548                 return txq->ctx_curr;
549
550         /* Mismatch, use the previous context */
551         return IXGBE_CTX_NUM;
552 }
553
554 static inline uint32_t
555 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
556 {
557         uint32_t tmp = 0;
558
559         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
560                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
561         if (ol_flags & PKT_TX_IP_CKSUM)
562                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
563         if (ol_flags & PKT_TX_TCP_SEG)
564                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
565         return tmp;
566 }
567
568 static inline uint32_t
569 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
570 {
571         uint32_t cmdtype = 0;
572
573         if (ol_flags & PKT_TX_VLAN_PKT)
574                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
575         if (ol_flags & PKT_TX_TCP_SEG)
576                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
577         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
578                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
579         if (ol_flags & PKT_TX_MACSEC)
580                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
581         return cmdtype;
582 }
583
584 /* Default RS bit threshold values */
585 #ifndef DEFAULT_TX_RS_THRESH
586 #define DEFAULT_TX_RS_THRESH   32
587 #endif
588 #ifndef DEFAULT_TX_FREE_THRESH
589 #define DEFAULT_TX_FREE_THRESH 32
590 #endif
591
592 /* Reset transmit descriptors after they have been used */
593 static inline int
594 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
595 {
596         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
597         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
598         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
599         uint16_t nb_tx_desc = txq->nb_tx_desc;
600         uint16_t desc_to_clean_to;
601         uint16_t nb_tx_to_clean;
602         uint32_t status;
603
604         /* Determine the last descriptor needing to be cleaned */
605         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
606         if (desc_to_clean_to >= nb_tx_desc)
607                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
608
609         /* Check to make sure the last descriptor to clean is done */
610         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
611         status = txr[desc_to_clean_to].wb.status;
612         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
613                 PMD_TX_FREE_LOG(DEBUG,
614                                 "TX descriptor %4u is not done"
615                                 "(port=%d queue=%d)",
616                                 desc_to_clean_to,
617                                 txq->port_id, txq->queue_id);
618                 /* Failed to clean any descriptors, better luck next time */
619                 return -(1);
620         }
621
622         /* Figure out how many descriptors will be cleaned */
623         if (last_desc_cleaned > desc_to_clean_to)
624                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
625                                                         desc_to_clean_to);
626         else
627                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
628                                                 last_desc_cleaned);
629
630         PMD_TX_FREE_LOG(DEBUG,
631                         "Cleaning %4u TX descriptors: %4u to %4u "
632                         "(port=%d queue=%d)",
633                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
634                         txq->port_id, txq->queue_id);
635
636         /*
637          * The last descriptor to clean is done, so that means all the
638          * descriptors from the last descriptor that was cleaned
639          * up to the last descriptor with the RS bit set
640          * are done. Only reset the threshold descriptor.
641          */
642         txr[desc_to_clean_to].wb.status = 0;
643
644         /* Update the txq to reflect the last descriptor that was cleaned */
645         txq->last_desc_cleaned = desc_to_clean_to;
646         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
647
648         /* No Error */
649         return 0;
650 }
651
652 uint16_t
653 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
654                 uint16_t nb_pkts)
655 {
656         struct ixgbe_tx_queue *txq;
657         struct ixgbe_tx_entry *sw_ring;
658         struct ixgbe_tx_entry *txe, *txn;
659         volatile union ixgbe_adv_tx_desc *txr;
660         volatile union ixgbe_adv_tx_desc *txd, *txp;
661         struct rte_mbuf     *tx_pkt;
662         struct rte_mbuf     *m_seg;
663         uint64_t buf_dma_addr;
664         uint32_t olinfo_status;
665         uint32_t cmd_type_len;
666         uint32_t pkt_len;
667         uint16_t slen;
668         uint64_t ol_flags;
669         uint16_t tx_id;
670         uint16_t tx_last;
671         uint16_t nb_tx;
672         uint16_t nb_used;
673         uint64_t tx_ol_req;
674         uint32_t ctx = 0;
675         uint32_t new_ctx;
676         union ixgbe_tx_offload tx_offload;
677 #ifdef RTE_LIBRTE_SECURITY
678         uint8_t use_ipsec;
679 #endif
680
681         tx_offload.data[0] = 0;
682         tx_offload.data[1] = 0;
683         txq = tx_queue;
684         sw_ring = txq->sw_ring;
685         txr     = txq->tx_ring;
686         tx_id   = txq->tx_tail;
687         txe = &sw_ring[tx_id];
688         txp = NULL;
689
690         /* Determine if the descriptor ring needs to be cleaned. */
691         if (txq->nb_tx_free < txq->tx_free_thresh)
692                 ixgbe_xmit_cleanup(txq);
693
694         rte_prefetch0(&txe->mbuf->pool);
695
696         /* TX loop */
697         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
698                 new_ctx = 0;
699                 tx_pkt = *tx_pkts++;
700                 pkt_len = tx_pkt->pkt_len;
701
702                 /*
703                  * Determine how many (if any) context descriptors
704                  * are needed for offload functionality.
705                  */
706                 ol_flags = tx_pkt->ol_flags;
707 #ifdef RTE_LIBRTE_SECURITY
708                 use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
709 #endif
710
711                 /* If hardware offload required */
712                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
713                 if (tx_ol_req) {
714                         tx_offload.l2_len = tx_pkt->l2_len;
715                         tx_offload.l3_len = tx_pkt->l3_len;
716                         tx_offload.l4_len = tx_pkt->l4_len;
717                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
718                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
719                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
720                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
721 #ifdef RTE_LIBRTE_SECURITY
722                         if (use_ipsec) {
723                                 union ixgbe_crypto_tx_desc_md *ipsec_mdata =
724                                         (union ixgbe_crypto_tx_desc_md *)
725                                                         &tx_pkt->udata64;
726                                 tx_offload.sa_idx = ipsec_mdata->sa_idx;
727                                 tx_offload.sec_pad_len = ipsec_mdata->pad_len;
728                         }
729 #endif
730
731                         /* If new context need be built or reuse the exist ctx. */
732                         ctx = what_advctx_update(txq, tx_ol_req,
733                                 tx_offload);
734                         /* Only allocate context descriptor if required*/
735                         new_ctx = (ctx == IXGBE_CTX_NUM);
736                         ctx = txq->ctx_curr;
737                 }
738
739                 /*
740                  * Keep track of how many descriptors are used this loop
741                  * This will always be the number of segments + the number of
742                  * Context descriptors required to transmit the packet
743                  */
744                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
745
746                 if (txp != NULL &&
747                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
748                         /* set RS on the previous packet in the burst */
749                         txp->read.cmd_type_len |=
750                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
751
752                 /*
753                  * The number of descriptors that must be allocated for a
754                  * packet is the number of segments of that packet, plus 1
755                  * Context Descriptor for the hardware offload, if any.
756                  * Determine the last TX descriptor to allocate in the TX ring
757                  * for the packet, starting from the current position (tx_id)
758                  * in the ring.
759                  */
760                 tx_last = (uint16_t) (tx_id + nb_used - 1);
761
762                 /* Circular ring */
763                 if (tx_last >= txq->nb_tx_desc)
764                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
765
766                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
767                            " tx_first=%u tx_last=%u",
768                            (unsigned) txq->port_id,
769                            (unsigned) txq->queue_id,
770                            (unsigned) pkt_len,
771                            (unsigned) tx_id,
772                            (unsigned) tx_last);
773
774                 /*
775                  * Make sure there are enough TX descriptors available to
776                  * transmit the entire packet.
777                  * nb_used better be less than or equal to txq->tx_rs_thresh
778                  */
779                 if (nb_used > txq->nb_tx_free) {
780                         PMD_TX_FREE_LOG(DEBUG,
781                                         "Not enough free TX descriptors "
782                                         "nb_used=%4u nb_free=%4u "
783                                         "(port=%d queue=%d)",
784                                         nb_used, txq->nb_tx_free,
785                                         txq->port_id, txq->queue_id);
786
787                         if (ixgbe_xmit_cleanup(txq) != 0) {
788                                 /* Could not clean any descriptors */
789                                 if (nb_tx == 0)
790                                         return 0;
791                                 goto end_of_tx;
792                         }
793
794                         /* nb_used better be <= txq->tx_rs_thresh */
795                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
796                                 PMD_TX_FREE_LOG(DEBUG,
797                                         "The number of descriptors needed to "
798                                         "transmit the packet exceeds the "
799                                         "RS bit threshold. This will impact "
800                                         "performance."
801                                         "nb_used=%4u nb_free=%4u "
802                                         "tx_rs_thresh=%4u. "
803                                         "(port=%d queue=%d)",
804                                         nb_used, txq->nb_tx_free,
805                                         txq->tx_rs_thresh,
806                                         txq->port_id, txq->queue_id);
807                                 /*
808                                  * Loop here until there are enough TX
809                                  * descriptors or until the ring cannot be
810                                  * cleaned.
811                                  */
812                                 while (nb_used > txq->nb_tx_free) {
813                                         if (ixgbe_xmit_cleanup(txq) != 0) {
814                                                 /*
815                                                  * Could not clean any
816                                                  * descriptors
817                                                  */
818                                                 if (nb_tx == 0)
819                                                         return 0;
820                                                 goto end_of_tx;
821                                         }
822                                 }
823                         }
824                 }
825
826                 /*
827                  * By now there are enough free TX descriptors to transmit
828                  * the packet.
829                  */
830
831                 /*
832                  * Set common flags of all TX Data Descriptors.
833                  *
834                  * The following bits must be set in all Data Descriptors:
835                  *   - IXGBE_ADVTXD_DTYP_DATA
836                  *   - IXGBE_ADVTXD_DCMD_DEXT
837                  *
838                  * The following bits must be set in the first Data Descriptor
839                  * and are ignored in the other ones:
840                  *   - IXGBE_ADVTXD_DCMD_IFCS
841                  *   - IXGBE_ADVTXD_MAC_1588
842                  *   - IXGBE_ADVTXD_DCMD_VLE
843                  *
844                  * The following bits must only be set in the last Data
845                  * Descriptor:
846                  *   - IXGBE_TXD_CMD_EOP
847                  *
848                  * The following bits can be set in any Data Descriptor, but
849                  * are only set in the last Data Descriptor:
850                  *   - IXGBE_TXD_CMD_RS
851                  */
852                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
853                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
854
855 #ifdef RTE_LIBRTE_IEEE1588
856                 if (ol_flags & PKT_TX_IEEE1588_TMST)
857                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
858 #endif
859
860                 olinfo_status = 0;
861                 if (tx_ol_req) {
862
863                         if (ol_flags & PKT_TX_TCP_SEG) {
864                                 /* when TSO is on, paylen in descriptor is the
865                                  * not the packet len but the tcp payload len */
866                                 pkt_len -= (tx_offload.l2_len +
867                                         tx_offload.l3_len + tx_offload.l4_len);
868                         }
869
870                         /*
871                          * Setup the TX Advanced Context Descriptor if required
872                          */
873                         if (new_ctx) {
874                                 volatile struct ixgbe_adv_tx_context_desc *
875                                     ctx_txd;
876
877                                 ctx_txd = (volatile struct
878                                     ixgbe_adv_tx_context_desc *)
879                                     &txr[tx_id];
880
881                                 txn = &sw_ring[txe->next_id];
882                                 rte_prefetch0(&txn->mbuf->pool);
883
884                                 if (txe->mbuf != NULL) {
885                                         rte_pktmbuf_free_seg(txe->mbuf);
886                                         txe->mbuf = NULL;
887                                 }
888
889                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
890                                         tx_offload, &tx_pkt->udata64);
891
892                                 txe->last_id = tx_last;
893                                 tx_id = txe->next_id;
894                                 txe = txn;
895                         }
896
897                         /*
898                          * Setup the TX Advanced Data Descriptor,
899                          * This path will go through
900                          * whatever new/reuse the context descriptor
901                          */
902                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
903                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
904                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
905                 }
906
907                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
908 #ifdef RTE_LIBRTE_SECURITY
909                 if (use_ipsec)
910                         olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
911 #endif
912
913                 m_seg = tx_pkt;
914                 do {
915                         txd = &txr[tx_id];
916                         txn = &sw_ring[txe->next_id];
917                         rte_prefetch0(&txn->mbuf->pool);
918
919                         if (txe->mbuf != NULL)
920                                 rte_pktmbuf_free_seg(txe->mbuf);
921                         txe->mbuf = m_seg;
922
923                         /*
924                          * Set up Transmit Data Descriptor.
925                          */
926                         slen = m_seg->data_len;
927                         buf_dma_addr = rte_mbuf_data_iova(m_seg);
928                         txd->read.buffer_addr =
929                                 rte_cpu_to_le_64(buf_dma_addr);
930                         txd->read.cmd_type_len =
931                                 rte_cpu_to_le_32(cmd_type_len | slen);
932                         txd->read.olinfo_status =
933                                 rte_cpu_to_le_32(olinfo_status);
934                         txe->last_id = tx_last;
935                         tx_id = txe->next_id;
936                         txe = txn;
937                         m_seg = m_seg->next;
938                 } while (m_seg != NULL);
939
940                 /*
941                  * The last packet data descriptor needs End Of Packet (EOP)
942                  */
943                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
944                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
945                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
946
947                 /* Set RS bit only on threshold packets' last descriptor */
948                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
949                         PMD_TX_FREE_LOG(DEBUG,
950                                         "Setting RS bit on TXD id="
951                                         "%4u (port=%d queue=%d)",
952                                         tx_last, txq->port_id, txq->queue_id);
953
954                         cmd_type_len |= IXGBE_TXD_CMD_RS;
955
956                         /* Update txq RS bit counters */
957                         txq->nb_tx_used = 0;
958                         txp = NULL;
959                 } else
960                         txp = txd;
961
962                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
963         }
964
965 end_of_tx:
966         /* set RS on last packet in the burst */
967         if (txp != NULL)
968                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
969
970         rte_wmb();
971
972         /*
973          * Set the Transmit Descriptor Tail (TDT)
974          */
975         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
976                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
977                    (unsigned) tx_id, (unsigned) nb_tx);
978         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
979         txq->tx_tail = tx_id;
980
981         return nb_tx;
982 }
983
984 /*********************************************************************
985  *
986  *  TX prep functions
987  *
988  **********************************************************************/
989 uint16_t
990 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
991 {
992         int i, ret;
993         uint64_t ol_flags;
994         struct rte_mbuf *m;
995         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
996
997         for (i = 0; i < nb_pkts; i++) {
998                 m = tx_pkts[i];
999                 ol_flags = m->ol_flags;
1000
1001                 /**
1002                  * Check if packet meets requirements for number of segments
1003                  *
1004                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
1005                  *       non-TSO
1006                  */
1007
1008                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
1009                         rte_errno = -EINVAL;
1010                         return i;
1011                 }
1012
1013                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
1014                         rte_errno = -ENOTSUP;
1015                         return i;
1016                 }
1017
1018 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
1019                 ret = rte_validate_tx_offload(m);
1020                 if (ret != 0) {
1021                         rte_errno = ret;
1022                         return i;
1023                 }
1024 #endif
1025                 ret = rte_net_intel_cksum_prepare(m);
1026                 if (ret != 0) {
1027                         rte_errno = ret;
1028                         return i;
1029                 }
1030         }
1031
1032         return i;
1033 }
1034
1035 /*********************************************************************
1036  *
1037  *  RX functions
1038  *
1039  **********************************************************************/
1040
1041 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1042 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1043 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1044 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1045 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1046 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1047 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1048 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1049 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1050 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1051 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1052 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1053 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1054 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1055 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1056 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1057 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1058 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1059 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1060 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1061 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1062 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1063 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1064 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1065 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1066 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1067 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1068 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1069 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1070 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1071 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1072 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1073 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1074
1075 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1076 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1077 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1078 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1079 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1080 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1081 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1082 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1083 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1084 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1085 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1086 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1087 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1088 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1089 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1090 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1091 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1092 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1093 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1094 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1095 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1096 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1097 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1098
1099 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1100 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1101 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1102 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1103 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1104 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1105 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1106 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1107 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1108 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1109 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1110 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1111 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1112 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1113 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1114 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1115 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1116 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1117 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1118 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1119 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1120 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1121 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1122
1123 /**
1124  * Use 2 different table for normal packet and tunnel packet
1125  * to save the space.
1126  */
1127 const uint32_t
1128         ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1129         [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1130         [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1131                 RTE_PTYPE_L3_IPV4,
1132         [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1133                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1134         [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1135                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1136         [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1137                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1138         [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1139                 RTE_PTYPE_L3_IPV4_EXT,
1140         [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1141                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1142         [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1143                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1144         [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1145                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1146         [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1147                 RTE_PTYPE_L3_IPV6,
1148         [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1149                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1150         [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1151                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1152         [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1153                 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1154         [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1155                 RTE_PTYPE_L3_IPV6_EXT,
1156         [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1157                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1158         [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1159                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1160         [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1161                 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1162         [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1163                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1164                 RTE_PTYPE_INNER_L3_IPV6,
1165         [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1166                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1167                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1168         [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1169                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1170         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1171         [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1172                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1173                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1174         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1175                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1176                 RTE_PTYPE_INNER_L3_IPV6,
1177         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1178                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1179                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1180         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1181                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1182                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1183         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1184                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1185                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1186         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1187                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1188                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1189         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1190                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1191                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1192         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1193                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1194                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1195         [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1196                 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1197                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1198         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1199                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1200                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1201         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1202                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1203                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1204         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1205                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1206                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1207         [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1208                 RTE_PTYPE_L2_ETHER |
1209                 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1210                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1211 };
1212
1213 const uint32_t
1214         ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1215         [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1216                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1217                 RTE_PTYPE_INNER_L2_ETHER,
1218         [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1219                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1220                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1221         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1222                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1223                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1224         [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1225                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1227         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1228                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1230         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1231                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1232                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1233         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1234                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1236         [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1237                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1238                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1239                 RTE_PTYPE_INNER_L4_TCP,
1240         [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1241                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1242                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1243                 RTE_PTYPE_INNER_L4_TCP,
1244         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1245                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1246                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1247         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1248                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1250                 RTE_PTYPE_INNER_L4_TCP,
1251         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1252                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1253                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1254                 RTE_PTYPE_INNER_L3_IPV4,
1255         [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1256                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1257                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1258                 RTE_PTYPE_INNER_L4_UDP,
1259         [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1260                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1261                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1262                 RTE_PTYPE_INNER_L4_UDP,
1263         [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1264                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1265                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1266                 RTE_PTYPE_INNER_L4_SCTP,
1267         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1268                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1269                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1270         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1271                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1273                 RTE_PTYPE_INNER_L4_UDP,
1274         [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1275                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1277                 RTE_PTYPE_INNER_L4_SCTP,
1278         [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1279                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1280                 RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1281                 RTE_PTYPE_INNER_L3_IPV4,
1282         [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1283                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1284                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1285                 RTE_PTYPE_INNER_L4_SCTP,
1286         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1287                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1288                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1289                 RTE_PTYPE_INNER_L4_SCTP,
1290         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1291                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1292                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1293                 RTE_PTYPE_INNER_L4_TCP,
1294         [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1295                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1296                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1297                 RTE_PTYPE_INNER_L4_UDP,
1298
1299         [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1300                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1301                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1302         [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1303                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                 RTE_PTYPE_INNER_L3_IPV4,
1306         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1307                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                 RTE_PTYPE_INNER_L3_IPV4_EXT,
1310         [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1311                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                 RTE_PTYPE_INNER_L3_IPV6,
1314         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1315                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                 RTE_PTYPE_INNER_L3_IPV4,
1318         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1319                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                 RTE_PTYPE_INNER_L3_IPV6_EXT,
1322         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1323                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1324                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1325                 RTE_PTYPE_INNER_L3_IPV4,
1326         [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1327                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1330         [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1331                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1334         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1335                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                 RTE_PTYPE_INNER_L3_IPV4,
1338         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1339                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1342         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1343                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1344                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1345                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1346         [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1347                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1350         [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1351                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1352                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1353                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1354         [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1355                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1358         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1359                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                 RTE_PTYPE_INNER_L3_IPV4,
1362         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1363                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1366         [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1367                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369                 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1370         [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1371                 RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1372                 RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1373                 RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1374         [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1375                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1376                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1377                 RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1378         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1379                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1380                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1381                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1382         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1383                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1384                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1385                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1386         [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1387                 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1388                 RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1389                 RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1390 };
1391
1392 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1393 static inline uint32_t
1394 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1395 {
1396
1397         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1398                 return RTE_PTYPE_UNKNOWN;
1399
1400         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1401
1402         /* For tunnel packet */
1403         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1404                 /* Remove the tunnel bit to save the space. */
1405                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1406                 return ptype_table_tn[pkt_info];
1407         }
1408
1409         /**
1410          * For x550, if it's not tunnel,
1411          * tunnel type bit should be set to 0.
1412          * Reuse 82599's mask.
1413          */
1414         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1415
1416         return ptype_table[pkt_info];
1417 }
1418
1419 static inline uint64_t
1420 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1421 {
1422         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1423                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1424                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1425                 PKT_RX_RSS_HASH, 0, 0, 0,
1426                 0, 0, 0,  PKT_RX_FDIR,
1427         };
1428 #ifdef RTE_LIBRTE_IEEE1588
1429         static uint64_t ip_pkt_etqf_map[8] = {
1430                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1431                 0, 0, 0, 0,
1432         };
1433
1434         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1435                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1436                                 ip_rss_types_map[pkt_info & 0XF];
1437         else
1438                 return ip_rss_types_map[pkt_info & 0XF];
1439 #else
1440         return ip_rss_types_map[pkt_info & 0XF];
1441 #endif
1442 }
1443
1444 static inline uint64_t
1445 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1446 {
1447         uint64_t pkt_flags;
1448
1449         /*
1450          * Check if VLAN present only.
1451          * Do not check whether L3/L4 rx checksum done by NIC or not,
1452          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1453          */
1454         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1455
1456 #ifdef RTE_LIBRTE_IEEE1588
1457         if (rx_status & IXGBE_RXD_STAT_TMST)
1458                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1459 #endif
1460         return pkt_flags;
1461 }
1462
1463 static inline uint64_t
1464 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1465 {
1466         uint64_t pkt_flags;
1467
1468         /*
1469          * Bit 31: IPE, IPv4 checksum error
1470          * Bit 30: L4I, L4I integrity error
1471          */
1472         static uint64_t error_to_pkt_flags_map[4] = {
1473                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1474                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1475                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1476                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1477         };
1478         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1479                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1480
1481         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1482             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1483                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1484         }
1485
1486 #ifdef RTE_LIBRTE_SECURITY
1487         if (rx_status & IXGBE_RXD_STAT_SECP) {
1488                 pkt_flags |= PKT_RX_SEC_OFFLOAD;
1489                 if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1490                         pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1491         }
1492 #endif
1493
1494         return pkt_flags;
1495 }
1496
1497 /*
1498  * LOOK_AHEAD defines how many desc statuses to check beyond the
1499  * current descriptor.
1500  * It must be a pound define for optimal performance.
1501  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1502  * function only works with LOOK_AHEAD=8.
1503  */
1504 #define LOOK_AHEAD 8
1505 #if (LOOK_AHEAD != 8)
1506 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1507 #endif
1508 static inline int
1509 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1510 {
1511         volatile union ixgbe_adv_rx_desc *rxdp;
1512         struct ixgbe_rx_entry *rxep;
1513         struct rte_mbuf *mb;
1514         uint16_t pkt_len;
1515         uint64_t pkt_flags;
1516         int nb_dd;
1517         uint32_t s[LOOK_AHEAD];
1518         uint32_t pkt_info[LOOK_AHEAD];
1519         int i, j, nb_rx = 0;
1520         uint32_t status;
1521         uint64_t vlan_flags = rxq->vlan_flags;
1522
1523         /* get references to current descriptor and S/W ring entry */
1524         rxdp = &rxq->rx_ring[rxq->rx_tail];
1525         rxep = &rxq->sw_ring[rxq->rx_tail];
1526
1527         status = rxdp->wb.upper.status_error;
1528         /* check to make sure there is at least 1 packet to receive */
1529         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1530                 return 0;
1531
1532         /*
1533          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1534          * reference packets that are ready to be received.
1535          */
1536         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1537              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1538                 /* Read desc statuses backwards to avoid race condition */
1539                 for (j = 0; j < LOOK_AHEAD; j++)
1540                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1541
1542                 rte_smp_rmb();
1543
1544                 /* Compute how many status bits were set */
1545                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1546                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1547                         ;
1548
1549                 for (j = 0; j < nb_dd; j++)
1550                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1551                                                        lo_dword.data);
1552
1553                 nb_rx += nb_dd;
1554
1555                 /* Translate descriptor info to mbuf format */
1556                 for (j = 0; j < nb_dd; ++j) {
1557                         mb = rxep[j].mbuf;
1558                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1559                                   rxq->crc_len;
1560                         mb->data_len = pkt_len;
1561                         mb->pkt_len = pkt_len;
1562                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1563
1564                         /* convert descriptor fields to rte mbuf flags */
1565                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1566                                 vlan_flags);
1567                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1568                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1569                                         ((uint16_t)pkt_info[j]);
1570                         mb->ol_flags = pkt_flags;
1571                         mb->packet_type =
1572                                 ixgbe_rxd_pkt_info_to_pkt_type
1573                                         (pkt_info[j], rxq->pkt_type_mask);
1574
1575                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1576                                 mb->hash.rss = rte_le_to_cpu_32(
1577                                     rxdp[j].wb.lower.hi_dword.rss);
1578                         else if (pkt_flags & PKT_RX_FDIR) {
1579                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1580                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1581                                     IXGBE_ATR_HASH_MASK;
1582                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1583                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1584                         }
1585                 }
1586
1587                 /* Move mbuf pointers from the S/W ring to the stage */
1588                 for (j = 0; j < LOOK_AHEAD; ++j) {
1589                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1590                 }
1591
1592                 /* stop if all requested packets could not be received */
1593                 if (nb_dd != LOOK_AHEAD)
1594                         break;
1595         }
1596
1597         /* clear software ring entries so we can cleanup correctly */
1598         for (i = 0; i < nb_rx; ++i) {
1599                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1600         }
1601
1602
1603         return nb_rx;
1604 }
1605
1606 static inline int
1607 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1608 {
1609         volatile union ixgbe_adv_rx_desc *rxdp;
1610         struct ixgbe_rx_entry *rxep;
1611         struct rte_mbuf *mb;
1612         uint16_t alloc_idx;
1613         __le64 dma_addr;
1614         int diag, i;
1615
1616         /* allocate buffers in bulk directly into the S/W ring */
1617         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1618         rxep = &rxq->sw_ring[alloc_idx];
1619         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1620                                     rxq->rx_free_thresh);
1621         if (unlikely(diag != 0))
1622                 return -ENOMEM;
1623
1624         rxdp = &rxq->rx_ring[alloc_idx];
1625         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1626                 /* populate the static rte mbuf fields */
1627                 mb = rxep[i].mbuf;
1628                 if (reset_mbuf) {
1629                         mb->port = rxq->port_id;
1630                 }
1631
1632                 rte_mbuf_refcnt_set(mb, 1);
1633                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1634
1635                 /* populate the descriptors */
1636                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1637                 rxdp[i].read.hdr_addr = 0;
1638                 rxdp[i].read.pkt_addr = dma_addr;
1639         }
1640
1641         /* update state of internal queue structure */
1642         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1643         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1644                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1645
1646         /* no errors */
1647         return 0;
1648 }
1649
1650 static inline uint16_t
1651 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1652                          uint16_t nb_pkts)
1653 {
1654         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1655         int i;
1656
1657         /* how many packets are ready to return? */
1658         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1659
1660         /* copy mbuf pointers to the application's packet list */
1661         for (i = 0; i < nb_pkts; ++i)
1662                 rx_pkts[i] = stage[i];
1663
1664         /* update internal queue state */
1665         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1666         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1667
1668         return nb_pkts;
1669 }
1670
1671 static inline uint16_t
1672 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1673              uint16_t nb_pkts)
1674 {
1675         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1676         uint16_t nb_rx = 0;
1677
1678         /* Any previously recv'd pkts will be returned from the Rx stage */
1679         if (rxq->rx_nb_avail)
1680                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1681
1682         /* Scan the H/W ring for packets to receive */
1683         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1684
1685         /* update internal queue state */
1686         rxq->rx_next_avail = 0;
1687         rxq->rx_nb_avail = nb_rx;
1688         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1689
1690         /* if required, allocate new buffers to replenish descriptors */
1691         if (rxq->rx_tail > rxq->rx_free_trigger) {
1692                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1693
1694                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1695                         int i, j;
1696
1697                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1698                                    "queue_id=%u", (unsigned) rxq->port_id,
1699                                    (unsigned) rxq->queue_id);
1700
1701                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1702                                 rxq->rx_free_thresh;
1703
1704                         /*
1705                          * Need to rewind any previous receives if we cannot
1706                          * allocate new buffers to replenish the old ones.
1707                          */
1708                         rxq->rx_nb_avail = 0;
1709                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1710                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1711                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1712
1713                         return 0;
1714                 }
1715
1716                 /* update tail pointer */
1717                 rte_wmb();
1718                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1719                                             cur_free_trigger);
1720         }
1721
1722         if (rxq->rx_tail >= rxq->nb_rx_desc)
1723                 rxq->rx_tail = 0;
1724
1725         /* received any packets this loop? */
1726         if (rxq->rx_nb_avail)
1727                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1728
1729         return 0;
1730 }
1731
1732 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1733 uint16_t
1734 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1735                            uint16_t nb_pkts)
1736 {
1737         uint16_t nb_rx;
1738
1739         if (unlikely(nb_pkts == 0))
1740                 return 0;
1741
1742         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1743                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1744
1745         /* request is relatively large, chunk it up */
1746         nb_rx = 0;
1747         while (nb_pkts) {
1748                 uint16_t ret, n;
1749
1750                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1751                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1752                 nb_rx = (uint16_t)(nb_rx + ret);
1753                 nb_pkts = (uint16_t)(nb_pkts - ret);
1754                 if (ret < n)
1755                         break;
1756         }
1757
1758         return nb_rx;
1759 }
1760
1761 uint16_t
1762 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1763                 uint16_t nb_pkts)
1764 {
1765         struct ixgbe_rx_queue *rxq;
1766         volatile union ixgbe_adv_rx_desc *rx_ring;
1767         volatile union ixgbe_adv_rx_desc *rxdp;
1768         struct ixgbe_rx_entry *sw_ring;
1769         struct ixgbe_rx_entry *rxe;
1770         struct rte_mbuf *rxm;
1771         struct rte_mbuf *nmb;
1772         union ixgbe_adv_rx_desc rxd;
1773         uint64_t dma_addr;
1774         uint32_t staterr;
1775         uint32_t pkt_info;
1776         uint16_t pkt_len;
1777         uint16_t rx_id;
1778         uint16_t nb_rx;
1779         uint16_t nb_hold;
1780         uint64_t pkt_flags;
1781         uint64_t vlan_flags;
1782
1783         nb_rx = 0;
1784         nb_hold = 0;
1785         rxq = rx_queue;
1786         rx_id = rxq->rx_tail;
1787         rx_ring = rxq->rx_ring;
1788         sw_ring = rxq->sw_ring;
1789         vlan_flags = rxq->vlan_flags;
1790         while (nb_rx < nb_pkts) {
1791                 /*
1792                  * The order of operations here is important as the DD status
1793                  * bit must not be read after any other descriptor fields.
1794                  * rx_ring and rxdp are pointing to volatile data so the order
1795                  * of accesses cannot be reordered by the compiler. If they were
1796                  * not volatile, they could be reordered which could lead to
1797                  * using invalid descriptor fields when read from rxd.
1798                  */
1799                 rxdp = &rx_ring[rx_id];
1800                 staterr = rxdp->wb.upper.status_error;
1801                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1802                         break;
1803                 rxd = *rxdp;
1804
1805                 /*
1806                  * End of packet.
1807                  *
1808                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1809                  * is likely to be invalid and to be dropped by the various
1810                  * validation checks performed by the network stack.
1811                  *
1812                  * Allocate a new mbuf to replenish the RX ring descriptor.
1813                  * If the allocation fails:
1814                  *    - arrange for that RX descriptor to be the first one
1815                  *      being parsed the next time the receive function is
1816                  *      invoked [on the same queue].
1817                  *
1818                  *    - Stop parsing the RX ring and return immediately.
1819                  *
1820                  * This policy do not drop the packet received in the RX
1821                  * descriptor for which the allocation of a new mbuf failed.
1822                  * Thus, it allows that packet to be later retrieved if
1823                  * mbuf have been freed in the mean time.
1824                  * As a side effect, holding RX descriptors instead of
1825                  * systematically giving them back to the NIC may lead to
1826                  * RX ring exhaustion situations.
1827                  * However, the NIC can gracefully prevent such situations
1828                  * to happen by sending specific "back-pressure" flow control
1829                  * frames to its peer(s).
1830                  */
1831                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1832                            "ext_err_stat=0x%08x pkt_len=%u",
1833                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1834                            (unsigned) rx_id, (unsigned) staterr,
1835                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1836
1837                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1838                 if (nmb == NULL) {
1839                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1840                                    "queue_id=%u", (unsigned) rxq->port_id,
1841                                    (unsigned) rxq->queue_id);
1842                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1843                         break;
1844                 }
1845
1846                 nb_hold++;
1847                 rxe = &sw_ring[rx_id];
1848                 rx_id++;
1849                 if (rx_id == rxq->nb_rx_desc)
1850                         rx_id = 0;
1851
1852                 /* Prefetch next mbuf while processing current one. */
1853                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1854
1855                 /*
1856                  * When next RX descriptor is on a cache-line boundary,
1857                  * prefetch the next 4 RX descriptors and the next 8 pointers
1858                  * to mbufs.
1859                  */
1860                 if ((rx_id & 0x3) == 0) {
1861                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1862                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1863                 }
1864
1865                 rxm = rxe->mbuf;
1866                 rxe->mbuf = nmb;
1867                 dma_addr =
1868                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1869                 rxdp->read.hdr_addr = 0;
1870                 rxdp->read.pkt_addr = dma_addr;
1871
1872                 /*
1873                  * Initialize the returned mbuf.
1874                  * 1) setup generic mbuf fields:
1875                  *    - number of segments,
1876                  *    - next segment,
1877                  *    - packet length,
1878                  *    - RX port identifier.
1879                  * 2) integrate hardware offload data, if any:
1880                  *    - RSS flag & hash,
1881                  *    - IP checksum flag,
1882                  *    - VLAN TCI, if any,
1883                  *    - error flags.
1884                  */
1885                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1886                                       rxq->crc_len);
1887                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1888                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1889                 rxm->nb_segs = 1;
1890                 rxm->next = NULL;
1891                 rxm->pkt_len = pkt_len;
1892                 rxm->data_len = pkt_len;
1893                 rxm->port = rxq->port_id;
1894
1895                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1896                 /* Only valid if PKT_RX_VLAN set in pkt_flags */
1897                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1898
1899                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1900                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1901                 pkt_flags = pkt_flags |
1902                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1903                 rxm->ol_flags = pkt_flags;
1904                 rxm->packet_type =
1905                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1906                                                        rxq->pkt_type_mask);
1907
1908                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1909                         rxm->hash.rss = rte_le_to_cpu_32(
1910                                                 rxd.wb.lower.hi_dword.rss);
1911                 else if (pkt_flags & PKT_RX_FDIR) {
1912                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1913                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1914                                         IXGBE_ATR_HASH_MASK;
1915                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1916                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1917                 }
1918                 /*
1919                  * Store the mbuf address into the next entry of the array
1920                  * of returned packets.
1921                  */
1922                 rx_pkts[nb_rx++] = rxm;
1923         }
1924         rxq->rx_tail = rx_id;
1925
1926         /*
1927          * If the number of free RX descriptors is greater than the RX free
1928          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1929          * register.
1930          * Update the RDT with the value of the last processed RX descriptor
1931          * minus 1, to guarantee that the RDT register is never equal to the
1932          * RDH register, which creates a "full" ring situtation from the
1933          * hardware point of view...
1934          */
1935         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1936         if (nb_hold > rxq->rx_free_thresh) {
1937                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1938                            "nb_hold=%u nb_rx=%u",
1939                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1940                            (unsigned) rx_id, (unsigned) nb_hold,
1941                            (unsigned) nb_rx);
1942                 rx_id = (uint16_t) ((rx_id == 0) ?
1943                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1944                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1945                 nb_hold = 0;
1946         }
1947         rxq->nb_rx_hold = nb_hold;
1948         return nb_rx;
1949 }
1950
1951 /**
1952  * Detect an RSC descriptor.
1953  */
1954 static inline uint32_t
1955 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1956 {
1957         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1958                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1959 }
1960
1961 /**
1962  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1963  *
1964  * Fill the following info in the HEAD buffer of the Rx cluster:
1965  *    - RX port identifier
1966  *    - hardware offload data, if any:
1967  *      - RSS flag & hash
1968  *      - IP checksum flag
1969  *      - VLAN TCI, if any
1970  *      - error flags
1971  * @head HEAD of the packet cluster
1972  * @desc HW descriptor to get data from
1973  * @rxq Pointer to the Rx queue
1974  */
1975 static inline void
1976 ixgbe_fill_cluster_head_buf(
1977         struct rte_mbuf *head,
1978         union ixgbe_adv_rx_desc *desc,
1979         struct ixgbe_rx_queue *rxq,
1980         uint32_t staterr)
1981 {
1982         uint32_t pkt_info;
1983         uint64_t pkt_flags;
1984
1985         head->port = rxq->port_id;
1986
1987         /* The vlan_tci field is only valid when PKT_RX_VLAN is
1988          * set in the pkt_flags field.
1989          */
1990         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1991         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1992         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1993         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1994         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1995         head->ol_flags = pkt_flags;
1996         head->packet_type =
1997                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1998
1999         if (likely(pkt_flags & PKT_RX_RSS_HASH))
2000                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2001         else if (pkt_flags & PKT_RX_FDIR) {
2002                 head->hash.fdir.hash =
2003                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2004                                                           & IXGBE_ATR_HASH_MASK;
2005                 head->hash.fdir.id =
2006                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2007         }
2008 }
2009
2010 /**
2011  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2012  *
2013  * @rx_queue Rx queue handle
2014  * @rx_pkts table of received packets
2015  * @nb_pkts size of rx_pkts table
2016  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2017  *
2018  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2019  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2020  *
2021  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2022  * 1) When non-EOP RSC completion arrives:
2023  *    a) Update the HEAD of the current RSC aggregation cluster with the new
2024  *       segment's data length.
2025  *    b) Set the "next" pointer of the current segment to point to the segment
2026  *       at the NEXTP index.
2027  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2028  *       in the sw_rsc_ring.
2029  * 2) When EOP arrives we just update the cluster's total length and offload
2030  *    flags and deliver the cluster up to the upper layers. In our case - put it
2031  *    in the rx_pkts table.
2032  *
2033  * Returns the number of received packets/clusters (according to the "bulk
2034  * receive" interface).
2035  */
2036 static inline uint16_t
2037 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2038                     bool bulk_alloc)
2039 {
2040         struct ixgbe_rx_queue *rxq = rx_queue;
2041         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2042         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2043         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2044         uint16_t rx_id = rxq->rx_tail;
2045         uint16_t nb_rx = 0;
2046         uint16_t nb_hold = rxq->nb_rx_hold;
2047         uint16_t prev_id = rxq->rx_tail;
2048
2049         while (nb_rx < nb_pkts) {
2050                 bool eop;
2051                 struct ixgbe_rx_entry *rxe;
2052                 struct ixgbe_scattered_rx_entry *sc_entry;
2053                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2054                 struct ixgbe_rx_entry *next_rxe = NULL;
2055                 struct rte_mbuf *first_seg;
2056                 struct rte_mbuf *rxm;
2057                 struct rte_mbuf *nmb;
2058                 union ixgbe_adv_rx_desc rxd;
2059                 uint16_t data_len;
2060                 uint16_t next_id;
2061                 volatile union ixgbe_adv_rx_desc *rxdp;
2062                 uint32_t staterr;
2063
2064 next_desc:
2065                 /*
2066                  * The code in this whole file uses the volatile pointer to
2067                  * ensure the read ordering of the status and the rest of the
2068                  * descriptor fields (on the compiler level only!!!). This is so
2069                  * UGLY - why not to just use the compiler barrier instead? DPDK
2070                  * even has the rte_compiler_barrier() for that.
2071                  *
2072                  * But most importantly this is just wrong because this doesn't
2073                  * ensure memory ordering in a general case at all. For
2074                  * instance, DPDK is supposed to work on Power CPUs where
2075                  * compiler barrier may just not be enough!
2076                  *
2077                  * I tried to write only this function properly to have a
2078                  * starting point (as a part of an LRO/RSC series) but the
2079                  * compiler cursed at me when I tried to cast away the
2080                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2081                  * keeping it the way it is for now.
2082                  *
2083                  * The code in this file is broken in so many other places and
2084                  * will just not work on a big endian CPU anyway therefore the
2085                  * lines below will have to be revisited together with the rest
2086                  * of the ixgbe PMD.
2087                  *
2088                  * TODO:
2089                  *    - Get rid of "volatile" crap and let the compiler do its
2090                  *      job.
2091                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2092                  *      memory ordering below.
2093                  */
2094                 rxdp = &rx_ring[rx_id];
2095                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2096
2097                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2098                         break;
2099
2100                 rxd = *rxdp;
2101
2102                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2103                                   "staterr=0x%x data_len=%u",
2104                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2105                            rte_le_to_cpu_16(rxd.wb.upper.length));
2106
2107                 if (!bulk_alloc) {
2108                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2109                         if (nmb == NULL) {
2110                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2111                                                   "port_id=%u queue_id=%u",
2112                                            rxq->port_id, rxq->queue_id);
2113
2114                                 rte_eth_devices[rxq->port_id].data->
2115                                                         rx_mbuf_alloc_failed++;
2116                                 break;
2117                         }
2118                 } else if (nb_hold > rxq->rx_free_thresh) {
2119                         uint16_t next_rdt = rxq->rx_free_trigger;
2120
2121                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2122                                 rte_wmb();
2123                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2124                                                             next_rdt);
2125                                 nb_hold -= rxq->rx_free_thresh;
2126                         } else {
2127                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2128                                                   "port_id=%u queue_id=%u",
2129                                            rxq->port_id, rxq->queue_id);
2130
2131                                 rte_eth_devices[rxq->port_id].data->
2132                                                         rx_mbuf_alloc_failed++;
2133                                 break;
2134                         }
2135                 }
2136
2137                 nb_hold++;
2138                 rxe = &sw_ring[rx_id];
2139                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2140
2141                 next_id = rx_id + 1;
2142                 if (next_id == rxq->nb_rx_desc)
2143                         next_id = 0;
2144
2145                 /* Prefetch next mbuf while processing current one. */
2146                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2147
2148                 /*
2149                  * When next RX descriptor is on a cache-line boundary,
2150                  * prefetch the next 4 RX descriptors and the next 4 pointers
2151                  * to mbufs.
2152                  */
2153                 if ((next_id & 0x3) == 0) {
2154                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2155                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2156                 }
2157
2158                 rxm = rxe->mbuf;
2159
2160                 if (!bulk_alloc) {
2161                         __le64 dma =
2162                           rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2163                         /*
2164                          * Update RX descriptor with the physical address of the
2165                          * new data buffer of the new allocated mbuf.
2166                          */
2167                         rxe->mbuf = nmb;
2168
2169                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2170                         rxdp->read.hdr_addr = 0;
2171                         rxdp->read.pkt_addr = dma;
2172                 } else
2173                         rxe->mbuf = NULL;
2174
2175                 /*
2176                  * Set data length & data buffer address of mbuf.
2177                  */
2178                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2179                 rxm->data_len = data_len;
2180
2181                 if (!eop) {
2182                         uint16_t nextp_id;
2183                         /*
2184                          * Get next descriptor index:
2185                          *  - For RSC it's in the NEXTP field.
2186                          *  - For a scattered packet - it's just a following
2187                          *    descriptor.
2188                          */
2189                         if (ixgbe_rsc_count(&rxd))
2190                                 nextp_id =
2191                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2192                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2193                         else
2194                                 nextp_id = next_id;
2195
2196                         next_sc_entry = &sw_sc_ring[nextp_id];
2197                         next_rxe = &sw_ring[nextp_id];
2198                         rte_ixgbe_prefetch(next_rxe);
2199                 }
2200
2201                 sc_entry = &sw_sc_ring[rx_id];
2202                 first_seg = sc_entry->fbuf;
2203                 sc_entry->fbuf = NULL;
2204
2205                 /*
2206                  * If this is the first buffer of the received packet,
2207                  * set the pointer to the first mbuf of the packet and
2208                  * initialize its context.
2209                  * Otherwise, update the total length and the number of segments
2210                  * of the current scattered packet, and update the pointer to
2211                  * the last mbuf of the current packet.
2212                  */
2213                 if (first_seg == NULL) {
2214                         first_seg = rxm;
2215                         first_seg->pkt_len = data_len;
2216                         first_seg->nb_segs = 1;
2217                 } else {
2218                         first_seg->pkt_len += data_len;
2219                         first_seg->nb_segs++;
2220                 }
2221
2222                 prev_id = rx_id;
2223                 rx_id = next_id;
2224
2225                 /*
2226                  * If this is not the last buffer of the received packet, update
2227                  * the pointer to the first mbuf at the NEXTP entry in the
2228                  * sw_sc_ring and continue to parse the RX ring.
2229                  */
2230                 if (!eop && next_rxe) {
2231                         rxm->next = next_rxe->mbuf;
2232                         next_sc_entry->fbuf = first_seg;
2233                         goto next_desc;
2234                 }
2235
2236                 /* Initialize the first mbuf of the returned packet */
2237                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2238
2239                 /*
2240                  * Deal with the case, when HW CRC srip is disabled.
2241                  * That can't happen when LRO is enabled, but still could
2242                  * happen for scattered RX mode.
2243                  */
2244                 first_seg->pkt_len -= rxq->crc_len;
2245                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2246                         struct rte_mbuf *lp;
2247
2248                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2249                                 ;
2250
2251                         first_seg->nb_segs--;
2252                         lp->data_len -= rxq->crc_len - rxm->data_len;
2253                         lp->next = NULL;
2254                         rte_pktmbuf_free_seg(rxm);
2255                 } else
2256                         rxm->data_len -= rxq->crc_len;
2257
2258                 /* Prefetch data of first segment, if configured to do so. */
2259                 rte_packet_prefetch((char *)first_seg->buf_addr +
2260                         first_seg->data_off);
2261
2262                 /*
2263                  * Store the mbuf address into the next entry of the array
2264                  * of returned packets.
2265                  */
2266                 rx_pkts[nb_rx++] = first_seg;
2267         }
2268
2269         /*
2270          * Record index of the next RX descriptor to probe.
2271          */
2272         rxq->rx_tail = rx_id;
2273
2274         /*
2275          * If the number of free RX descriptors is greater than the RX free
2276          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2277          * register.
2278          * Update the RDT with the value of the last processed RX descriptor
2279          * minus 1, to guarantee that the RDT register is never equal to the
2280          * RDH register, which creates a "full" ring situtation from the
2281          * hardware point of view...
2282          */
2283         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2284                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2285                            "nb_hold=%u nb_rx=%u",
2286                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2287
2288                 rte_wmb();
2289                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2290                 nb_hold = 0;
2291         }
2292
2293         rxq->nb_rx_hold = nb_hold;
2294         return nb_rx;
2295 }
2296
2297 uint16_t
2298 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2299                                  uint16_t nb_pkts)
2300 {
2301         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2302 }
2303
2304 uint16_t
2305 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2306                                uint16_t nb_pkts)
2307 {
2308         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2309 }
2310
2311 /*********************************************************************
2312  *
2313  *  Queue management functions
2314  *
2315  **********************************************************************/
2316
2317 static void __attribute__((cold))
2318 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2319 {
2320         unsigned i;
2321
2322         if (txq->sw_ring != NULL) {
2323                 for (i = 0; i < txq->nb_tx_desc; i++) {
2324                         if (txq->sw_ring[i].mbuf != NULL) {
2325                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2326                                 txq->sw_ring[i].mbuf = NULL;
2327                         }
2328                 }
2329         }
2330 }
2331
2332 static void __attribute__((cold))
2333 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2334 {
2335         if (txq != NULL &&
2336             txq->sw_ring != NULL)
2337                 rte_free(txq->sw_ring);
2338 }
2339
2340 static void __attribute__((cold))
2341 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2342 {
2343         if (txq != NULL && txq->ops != NULL) {
2344                 txq->ops->release_mbufs(txq);
2345                 txq->ops->free_swring(txq);
2346                 rte_free(txq);
2347         }
2348 }
2349
2350 void __attribute__((cold))
2351 ixgbe_dev_tx_queue_release(void *txq)
2352 {
2353         ixgbe_tx_queue_release(txq);
2354 }
2355
2356 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2357 static void __attribute__((cold))
2358 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2359 {
2360         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2361         struct ixgbe_tx_entry *txe = txq->sw_ring;
2362         uint16_t prev, i;
2363
2364         /* Zero out HW ring memory */
2365         for (i = 0; i < txq->nb_tx_desc; i++) {
2366                 txq->tx_ring[i] = zeroed_desc;
2367         }
2368
2369         /* Initialize SW ring entries */
2370         prev = (uint16_t) (txq->nb_tx_desc - 1);
2371         for (i = 0; i < txq->nb_tx_desc; i++) {
2372                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2373
2374                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2375                 txe[i].mbuf = NULL;
2376                 txe[i].last_id = i;
2377                 txe[prev].next_id = i;
2378                 prev = i;
2379         }
2380
2381         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2382         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2383
2384         txq->tx_tail = 0;
2385         txq->nb_tx_used = 0;
2386         /*
2387          * Always allow 1 descriptor to be un-allocated to avoid
2388          * a H/W race condition
2389          */
2390         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2391         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2392         txq->ctx_curr = 0;
2393         memset((void *)&txq->ctx_cache, 0,
2394                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2395 }
2396
2397 static const struct ixgbe_txq_ops def_txq_ops = {
2398         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2399         .free_swring = ixgbe_tx_free_swring,
2400         .reset = ixgbe_reset_tx_queue,
2401 };
2402
2403 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2404  * the queue parameters. Used in tx_queue_setup by primary process and then
2405  * in dev_init by secondary process when attaching to an existing ethdev.
2406  */
2407 void __attribute__((cold))
2408 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2409 {
2410         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2411         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) &&
2412 #ifdef RTE_LIBRTE_SECURITY
2413                         !(txq->using_ipsec) &&
2414 #endif
2415                         (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2416                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2417                 dev->tx_pkt_prepare = NULL;
2418 #ifdef RTE_IXGBE_INC_VECTOR
2419                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2420                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2421                                         ixgbe_txq_vec_setup(txq) == 0)) {
2422                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2423                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2424                 } else
2425 #endif
2426                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2427         } else {
2428                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2429                 PMD_INIT_LOG(DEBUG,
2430                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2431                                 (unsigned long)txq->txq_flags,
2432                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2433                 PMD_INIT_LOG(DEBUG,
2434                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2435                                 (unsigned long)txq->tx_rs_thresh,
2436                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2437                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2438                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2439         }
2440 }
2441
2442 int __attribute__((cold))
2443 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2444                          uint16_t queue_idx,
2445                          uint16_t nb_desc,
2446                          unsigned int socket_id,
2447                          const struct rte_eth_txconf *tx_conf)
2448 {
2449         const struct rte_memzone *tz;
2450         struct ixgbe_tx_queue *txq;
2451         struct ixgbe_hw     *hw;
2452         uint16_t tx_rs_thresh, tx_free_thresh;
2453
2454         PMD_INIT_FUNC_TRACE();
2455         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2456
2457         /*
2458          * Validate number of transmit descriptors.
2459          * It must not exceed hardware maximum, and must be multiple
2460          * of IXGBE_ALIGN.
2461          */
2462         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2463                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2464                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2465                 return -EINVAL;
2466         }
2467
2468         /*
2469          * The following two parameters control the setting of the RS bit on
2470          * transmit descriptors.
2471          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2472          * descriptors have been used.
2473          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2474          * descriptors are used or if the number of descriptors required
2475          * to transmit a packet is greater than the number of free TX
2476          * descriptors.
2477          * The following constraints must be satisfied:
2478          *  tx_rs_thresh must be greater than 0.
2479          *  tx_rs_thresh must be less than the size of the ring minus 2.
2480          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2481          *  tx_rs_thresh must be a divisor of the ring size.
2482          *  tx_free_thresh must be greater than 0.
2483          *  tx_free_thresh must be less than the size of the ring minus 3.
2484          * One descriptor in the TX ring is used as a sentinel to avoid a
2485          * H/W race condition, hence the maximum threshold constraints.
2486          * When set to zero use default values.
2487          */
2488         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2489                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2490         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2491                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2492         if (tx_rs_thresh >= (nb_desc - 2)) {
2493                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2494                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2495                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2496                         (int)dev->data->port_id, (int)queue_idx);
2497                 return -(EINVAL);
2498         }
2499         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2500                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2501                         "(tx_rs_thresh=%u port=%d queue=%d)",
2502                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2503                         (int)dev->data->port_id, (int)queue_idx);
2504                 return -(EINVAL);
2505         }
2506         if (tx_free_thresh >= (nb_desc - 3)) {
2507                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2508                              "tx_free_thresh must be less than the number of "
2509                              "TX descriptors minus 3. (tx_free_thresh=%u "
2510                              "port=%d queue=%d)",
2511                              (unsigned int)tx_free_thresh,
2512                              (int)dev->data->port_id, (int)queue_idx);
2513                 return -(EINVAL);
2514         }
2515         if (tx_rs_thresh > tx_free_thresh) {
2516                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2517                              "tx_free_thresh. (tx_free_thresh=%u "
2518                              "tx_rs_thresh=%u port=%d queue=%d)",
2519                              (unsigned int)tx_free_thresh,
2520                              (unsigned int)tx_rs_thresh,
2521                              (int)dev->data->port_id,
2522                              (int)queue_idx);
2523                 return -(EINVAL);
2524         }
2525         if ((nb_desc % tx_rs_thresh) != 0) {
2526                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2527                              "number of TX descriptors. (tx_rs_thresh=%u "
2528                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2529                              (int)dev->data->port_id, (int)queue_idx);
2530                 return -(EINVAL);
2531         }
2532
2533         /*
2534          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2535          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2536          * by the NIC and all descriptors are written back after the NIC
2537          * accumulates WTHRESH descriptors.
2538          */
2539         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2540                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2541                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2542                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2543                              (int)dev->data->port_id, (int)queue_idx);
2544                 return -(EINVAL);
2545         }
2546
2547         /* Free memory prior to re-allocation if needed... */
2548         if (dev->data->tx_queues[queue_idx] != NULL) {
2549                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2550                 dev->data->tx_queues[queue_idx] = NULL;
2551         }
2552
2553         /* First allocate the tx queue data structure */
2554         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2555                                  RTE_CACHE_LINE_SIZE, socket_id);
2556         if (txq == NULL)
2557                 return -ENOMEM;
2558
2559         /*
2560          * Allocate TX ring hardware descriptors. A memzone large enough to
2561          * handle the maximum ring size is allocated in order to allow for
2562          * resizing in later calls to the queue setup function.
2563          */
2564         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2565                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2566                         IXGBE_ALIGN, socket_id);
2567         if (tz == NULL) {
2568                 ixgbe_tx_queue_release(txq);
2569                 return -ENOMEM;
2570         }
2571
2572         txq->nb_tx_desc = nb_desc;
2573         txq->tx_rs_thresh = tx_rs_thresh;
2574         txq->tx_free_thresh = tx_free_thresh;
2575         txq->pthresh = tx_conf->tx_thresh.pthresh;
2576         txq->hthresh = tx_conf->tx_thresh.hthresh;
2577         txq->wthresh = tx_conf->tx_thresh.wthresh;
2578         txq->queue_id = queue_idx;
2579         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2580                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2581         txq->port_id = dev->data->port_id;
2582         txq->txq_flags = tx_conf->txq_flags;
2583         txq->ops = &def_txq_ops;
2584         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2585 #ifdef RTE_LIBRTE_SECURITY
2586         txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2587                         DEV_TX_OFFLOAD_SECURITY);
2588 #endif
2589
2590         /*
2591          * Modification to set VFTDT for virtual function if vf is detected
2592          */
2593         if (hw->mac.type == ixgbe_mac_82599_vf ||
2594             hw->mac.type == ixgbe_mac_X540_vf ||
2595             hw->mac.type == ixgbe_mac_X550_vf ||
2596             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2597             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2598                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2599         else
2600                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2601
2602         txq->tx_ring_phys_addr = tz->iova;
2603         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2604
2605         /* Allocate software ring */
2606         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2607                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2608                                 RTE_CACHE_LINE_SIZE, socket_id);
2609         if (txq->sw_ring == NULL) {
2610                 ixgbe_tx_queue_release(txq);
2611                 return -ENOMEM;
2612         }
2613         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2614                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2615
2616         /* set up vector or scalar TX function as appropriate */
2617         ixgbe_set_tx_function(dev, txq);
2618
2619         txq->ops->reset(txq);
2620
2621         dev->data->tx_queues[queue_idx] = txq;
2622
2623
2624         return 0;
2625 }
2626
2627 /**
2628  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2629  *
2630  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2631  * in the sw_rsc_ring is not set to NULL but rather points to the next
2632  * mbuf of this RSC aggregation (that has not been completed yet and still
2633  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2634  * will just free first "nb_segs" segments of the cluster explicitly by calling
2635  * an rte_pktmbuf_free_seg().
2636  *
2637  * @m scattered cluster head
2638  */
2639 static void __attribute__((cold))
2640 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2641 {
2642         uint16_t i, nb_segs = m->nb_segs;
2643         struct rte_mbuf *next_seg;
2644
2645         for (i = 0; i < nb_segs; i++) {
2646                 next_seg = m->next;
2647                 rte_pktmbuf_free_seg(m);
2648                 m = next_seg;
2649         }
2650 }
2651
2652 static void __attribute__((cold))
2653 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2654 {
2655         unsigned i;
2656
2657 #ifdef RTE_IXGBE_INC_VECTOR
2658         /* SSE Vector driver has a different way of releasing mbufs. */
2659         if (rxq->rx_using_sse) {
2660                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2661                 return;
2662         }
2663 #endif
2664
2665         if (rxq->sw_ring != NULL) {
2666                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2667                         if (rxq->sw_ring[i].mbuf != NULL) {
2668                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2669                                 rxq->sw_ring[i].mbuf = NULL;
2670                         }
2671                 }
2672                 if (rxq->rx_nb_avail) {
2673                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2674                                 struct rte_mbuf *mb;
2675
2676                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2677                                 rte_pktmbuf_free_seg(mb);
2678                         }
2679                         rxq->rx_nb_avail = 0;
2680                 }
2681         }
2682
2683         if (rxq->sw_sc_ring)
2684                 for (i = 0; i < rxq->nb_rx_desc; i++)
2685                         if (rxq->sw_sc_ring[i].fbuf) {
2686                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2687                                 rxq->sw_sc_ring[i].fbuf = NULL;
2688                         }
2689 }
2690
2691 static void __attribute__((cold))
2692 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2693 {
2694         if (rxq != NULL) {
2695                 ixgbe_rx_queue_release_mbufs(rxq);
2696                 rte_free(rxq->sw_ring);
2697                 rte_free(rxq->sw_sc_ring);
2698                 rte_free(rxq);
2699         }
2700 }
2701
2702 void __attribute__((cold))
2703 ixgbe_dev_rx_queue_release(void *rxq)
2704 {
2705         ixgbe_rx_queue_release(rxq);
2706 }
2707
2708 /*
2709  * Check if Rx Burst Bulk Alloc function can be used.
2710  * Return
2711  *        0: the preconditions are satisfied and the bulk allocation function
2712  *           can be used.
2713  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2714  *           function must be used.
2715  */
2716 static inline int __attribute__((cold))
2717 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2718 {
2719         int ret = 0;
2720
2721         /*
2722          * Make sure the following pre-conditions are satisfied:
2723          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2724          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2725          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2726          * Scattered packets are not supported.  This should be checked
2727          * outside of this function.
2728          */
2729         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2730                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2731                              "rxq->rx_free_thresh=%d, "
2732                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2733                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2734                 ret = -EINVAL;
2735         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2736                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2737                              "rxq->rx_free_thresh=%d, "
2738                              "rxq->nb_rx_desc=%d",
2739                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2740                 ret = -EINVAL;
2741         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2742                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2743                              "rxq->nb_rx_desc=%d, "
2744                              "rxq->rx_free_thresh=%d",
2745                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2746                 ret = -EINVAL;
2747         }
2748
2749         return ret;
2750 }
2751
2752 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2753 static void __attribute__((cold))
2754 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2755 {
2756         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2757         unsigned i;
2758         uint16_t len = rxq->nb_rx_desc;
2759
2760         /*
2761          * By default, the Rx queue setup function allocates enough memory for
2762          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2763          * extra memory at the end of the descriptor ring to be zero'd out.
2764          */
2765         if (adapter->rx_bulk_alloc_allowed)
2766                 /* zero out extra memory */
2767                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2768
2769         /*
2770          * Zero out HW ring memory. Zero out extra memory at the end of
2771          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2772          * reads extra memory as zeros.
2773          */
2774         for (i = 0; i < len; i++) {
2775                 rxq->rx_ring[i] = zeroed_desc;
2776         }
2777
2778         /*
2779          * initialize extra software ring entries. Space for these extra
2780          * entries is always allocated
2781          */
2782         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2783         for (i = rxq->nb_rx_desc; i < len; ++i) {
2784                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2785         }
2786
2787         rxq->rx_nb_avail = 0;
2788         rxq->rx_next_avail = 0;
2789         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2790         rxq->rx_tail = 0;
2791         rxq->nb_rx_hold = 0;
2792         rxq->pkt_first_seg = NULL;
2793         rxq->pkt_last_seg = NULL;
2794
2795 #ifdef RTE_IXGBE_INC_VECTOR
2796         rxq->rxrearm_start = 0;
2797         rxq->rxrearm_nb = 0;
2798 #endif
2799 }
2800
2801 int __attribute__((cold))
2802 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2803                          uint16_t queue_idx,
2804                          uint16_t nb_desc,
2805                          unsigned int socket_id,
2806                          const struct rte_eth_rxconf *rx_conf,
2807                          struct rte_mempool *mp)
2808 {
2809         const struct rte_memzone *rz;
2810         struct ixgbe_rx_queue *rxq;
2811         struct ixgbe_hw     *hw;
2812         uint16_t len;
2813         struct ixgbe_adapter *adapter =
2814                 (struct ixgbe_adapter *)dev->data->dev_private;
2815
2816         PMD_INIT_FUNC_TRACE();
2817         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2818
2819         /*
2820          * Validate number of receive descriptors.
2821          * It must not exceed hardware maximum, and must be multiple
2822          * of IXGBE_ALIGN.
2823          */
2824         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2825                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2826                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2827                 return -EINVAL;
2828         }
2829
2830         /* Free memory prior to re-allocation if needed... */
2831         if (dev->data->rx_queues[queue_idx] != NULL) {
2832                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2833                 dev->data->rx_queues[queue_idx] = NULL;
2834         }
2835
2836         /* First allocate the rx queue data structure */
2837         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2838                                  RTE_CACHE_LINE_SIZE, socket_id);
2839         if (rxq == NULL)
2840                 return -ENOMEM;
2841         rxq->mb_pool = mp;
2842         rxq->nb_rx_desc = nb_desc;
2843         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2844         rxq->queue_id = queue_idx;
2845         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2846                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2847         rxq->port_id = dev->data->port_id;
2848         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2849                                                         0 : ETHER_CRC_LEN);
2850         rxq->drop_en = rx_conf->rx_drop_en;
2851         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2852
2853         /*
2854          * The packet type in RX descriptor is different for different NICs.
2855          * Some bits are used for x550 but reserved for other NICS.
2856          * So set different masks for different NICs.
2857          */
2858         if (hw->mac.type == ixgbe_mac_X550 ||
2859             hw->mac.type == ixgbe_mac_X550EM_x ||
2860             hw->mac.type == ixgbe_mac_X550EM_a ||
2861             hw->mac.type == ixgbe_mac_X550_vf ||
2862             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2863             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2864                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2865         else
2866                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2867
2868         /*
2869          * Allocate RX ring hardware descriptors. A memzone large enough to
2870          * handle the maximum ring size is allocated in order to allow for
2871          * resizing in later calls to the queue setup function.
2872          */
2873         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2874                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2875         if (rz == NULL) {
2876                 ixgbe_rx_queue_release(rxq);
2877                 return -ENOMEM;
2878         }
2879
2880         /*
2881          * Zero init all the descriptors in the ring.
2882          */
2883         memset(rz->addr, 0, RX_RING_SZ);
2884
2885         /*
2886          * Modified to setup VFRDT for Virtual Function
2887          */
2888         if (hw->mac.type == ixgbe_mac_82599_vf ||
2889             hw->mac.type == ixgbe_mac_X540_vf ||
2890             hw->mac.type == ixgbe_mac_X550_vf ||
2891             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2892             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2893                 rxq->rdt_reg_addr =
2894                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2895                 rxq->rdh_reg_addr =
2896                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2897         } else {
2898                 rxq->rdt_reg_addr =
2899                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2900                 rxq->rdh_reg_addr =
2901                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2902         }
2903
2904         rxq->rx_ring_phys_addr = rz->iova;
2905         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2906
2907         /*
2908          * Certain constraints must be met in order to use the bulk buffer
2909          * allocation Rx burst function. If any of Rx queues doesn't meet them
2910          * the feature should be disabled for the whole port.
2911          */
2912         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2913                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2914                                     "preconditions - canceling the feature for "
2915                                     "the whole port[%d]",
2916                              rxq->queue_id, rxq->port_id);
2917                 adapter->rx_bulk_alloc_allowed = false;
2918         }
2919
2920         /*
2921          * Allocate software ring. Allow for space at the end of the
2922          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2923          * function does not access an invalid memory region.
2924          */
2925         len = nb_desc;
2926         if (adapter->rx_bulk_alloc_allowed)
2927                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2928
2929         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2930                                           sizeof(struct ixgbe_rx_entry) * len,
2931                                           RTE_CACHE_LINE_SIZE, socket_id);
2932         if (!rxq->sw_ring) {
2933                 ixgbe_rx_queue_release(rxq);
2934                 return -ENOMEM;
2935         }
2936
2937         /*
2938          * Always allocate even if it's not going to be needed in order to
2939          * simplify the code.
2940          *
2941          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2942          * be requested in ixgbe_dev_rx_init(), which is called later from
2943          * dev_start() flow.
2944          */
2945         rxq->sw_sc_ring =
2946                 rte_zmalloc_socket("rxq->sw_sc_ring",
2947                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2948                                    RTE_CACHE_LINE_SIZE, socket_id);
2949         if (!rxq->sw_sc_ring) {
2950                 ixgbe_rx_queue_release(rxq);
2951                 return -ENOMEM;
2952         }
2953
2954         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2955                             "dma_addr=0x%"PRIx64,
2956                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2957                      rxq->rx_ring_phys_addr);
2958
2959         if (!rte_is_power_of_2(nb_desc)) {
2960                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2961                                     "preconditions - canceling the feature for "
2962                                     "the whole port[%d]",
2963                              rxq->queue_id, rxq->port_id);
2964                 adapter->rx_vec_allowed = false;
2965         } else
2966                 ixgbe_rxq_vec_setup(rxq);
2967
2968         dev->data->rx_queues[queue_idx] = rxq;
2969
2970         ixgbe_reset_rx_queue(adapter, rxq);
2971
2972         return 0;
2973 }
2974
2975 uint32_t
2976 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2977 {
2978 #define IXGBE_RXQ_SCAN_INTERVAL 4
2979         volatile union ixgbe_adv_rx_desc *rxdp;
2980         struct ixgbe_rx_queue *rxq;
2981         uint32_t desc = 0;
2982
2983         rxq = dev->data->rx_queues[rx_queue_id];
2984         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2985
2986         while ((desc < rxq->nb_rx_desc) &&
2987                 (rxdp->wb.upper.status_error &
2988                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2989                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2990                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2991                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2992                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2993                                 desc - rxq->nb_rx_desc]);
2994         }
2995
2996         return desc;
2997 }
2998
2999 int
3000 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3001 {
3002         volatile union ixgbe_adv_rx_desc *rxdp;
3003         struct ixgbe_rx_queue *rxq = rx_queue;
3004         uint32_t desc;
3005
3006         if (unlikely(offset >= rxq->nb_rx_desc))
3007                 return 0;
3008         desc = rxq->rx_tail + offset;
3009         if (desc >= rxq->nb_rx_desc)
3010                 desc -= rxq->nb_rx_desc;
3011
3012         rxdp = &rxq->rx_ring[desc];
3013         return !!(rxdp->wb.upper.status_error &
3014                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3015 }
3016
3017 int
3018 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3019 {
3020         struct ixgbe_rx_queue *rxq = rx_queue;
3021         volatile uint32_t *status;
3022         uint32_t nb_hold, desc;
3023
3024         if (unlikely(offset >= rxq->nb_rx_desc))
3025                 return -EINVAL;
3026
3027 #ifdef RTE_IXGBE_INC_VECTOR
3028         if (rxq->rx_using_sse)
3029                 nb_hold = rxq->rxrearm_nb;
3030         else
3031 #endif
3032                 nb_hold = rxq->nb_rx_hold;
3033         if (offset >= rxq->nb_rx_desc - nb_hold)
3034                 return RTE_ETH_RX_DESC_UNAVAIL;
3035
3036         desc = rxq->rx_tail + offset;
3037         if (desc >= rxq->nb_rx_desc)
3038                 desc -= rxq->nb_rx_desc;
3039
3040         status = &rxq->rx_ring[desc].wb.upper.status_error;
3041         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3042                 return RTE_ETH_RX_DESC_DONE;
3043
3044         return RTE_ETH_RX_DESC_AVAIL;
3045 }
3046
3047 int
3048 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3049 {
3050         struct ixgbe_tx_queue *txq = tx_queue;
3051         volatile uint32_t *status;
3052         uint32_t desc;
3053
3054         if (unlikely(offset >= txq->nb_tx_desc))
3055                 return -EINVAL;
3056
3057         desc = txq->tx_tail + offset;
3058         /* go to next desc that has the RS bit */
3059         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3060                 txq->tx_rs_thresh;
3061         if (desc >= txq->nb_tx_desc) {
3062                 desc -= txq->nb_tx_desc;
3063                 if (desc >= txq->nb_tx_desc)
3064                         desc -= txq->nb_tx_desc;
3065         }
3066
3067         status = &txq->tx_ring[desc].wb.status;
3068         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3069                 return RTE_ETH_TX_DESC_DONE;
3070
3071         return RTE_ETH_TX_DESC_FULL;
3072 }
3073
3074 void __attribute__((cold))
3075 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3076 {
3077         unsigned i;
3078         struct ixgbe_adapter *adapter =
3079                 (struct ixgbe_adapter *)dev->data->dev_private;
3080
3081         PMD_INIT_FUNC_TRACE();
3082
3083         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3084                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3085
3086                 if (txq != NULL) {
3087                         txq->ops->release_mbufs(txq);
3088                         txq->ops->reset(txq);
3089                 }
3090         }
3091
3092         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3093                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3094
3095                 if (rxq != NULL) {
3096                         ixgbe_rx_queue_release_mbufs(rxq);
3097                         ixgbe_reset_rx_queue(adapter, rxq);
3098                 }
3099         }
3100 }
3101
3102 void
3103 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3104 {
3105         unsigned i;
3106
3107         PMD_INIT_FUNC_TRACE();
3108
3109         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3110                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3111                 dev->data->rx_queues[i] = NULL;
3112         }
3113         dev->data->nb_rx_queues = 0;
3114
3115         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3116                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3117                 dev->data->tx_queues[i] = NULL;
3118         }
3119         dev->data->nb_tx_queues = 0;
3120 }
3121
3122 /*********************************************************************
3123  *
3124  *  Device RX/TX init functions
3125  *
3126  **********************************************************************/
3127
3128 /**
3129  * Receive Side Scaling (RSS)
3130  * See section 7.1.2.8 in the following document:
3131  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3132  *
3133  * Principles:
3134  * The source and destination IP addresses of the IP header and the source
3135  * and destination ports of TCP/UDP headers, if any, of received packets are
3136  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3137  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3138  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3139  * RSS output index which is used as the RX queue index where to store the
3140  * received packets.
3141  * The following output is supplied in the RX write-back descriptor:
3142  *     - 32-bit result of the Microsoft RSS hash function,
3143  *     - 4-bit RSS type field.
3144  */
3145
3146 /*
3147  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3148  * Used as the default key.
3149  */
3150 static uint8_t rss_intel_key[40] = {
3151         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3152         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3153         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3154         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3155         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3156 };
3157
3158 static void
3159 ixgbe_rss_disable(struct rte_eth_dev *dev)
3160 {
3161         struct ixgbe_hw *hw;
3162         uint32_t mrqc;
3163         uint32_t mrqc_reg;
3164
3165         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3166         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3167         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3168         mrqc &= ~IXGBE_MRQC_RSSEN;
3169         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3170 }
3171
3172 static void
3173 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3174 {
3175         uint8_t  *hash_key;
3176         uint32_t mrqc;
3177         uint32_t rss_key;
3178         uint64_t rss_hf;
3179         uint16_t i;
3180         uint32_t mrqc_reg;
3181         uint32_t rssrk_reg;
3182
3183         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3184         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3185
3186         hash_key = rss_conf->rss_key;
3187         if (hash_key != NULL) {
3188                 /* Fill in RSS hash key */
3189                 for (i = 0; i < 10; i++) {
3190                         rss_key  = hash_key[(i * 4)];
3191                         rss_key |= hash_key[(i * 4) + 1] << 8;
3192                         rss_key |= hash_key[(i * 4) + 2] << 16;
3193                         rss_key |= hash_key[(i * 4) + 3] << 24;
3194                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3195                 }
3196         }
3197
3198         /* Set configured hashing protocols in MRQC register */
3199         rss_hf = rss_conf->rss_hf;
3200         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3201         if (rss_hf & ETH_RSS_IPV4)
3202                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3203         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3204                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3205         if (rss_hf & ETH_RSS_IPV6)
3206                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3207         if (rss_hf & ETH_RSS_IPV6_EX)
3208                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3209         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3210                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3211         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3212                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3213         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3214                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3215         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3216                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3217         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3218                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3219         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3220 }
3221
3222 int
3223 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3224                           struct rte_eth_rss_conf *rss_conf)
3225 {
3226         struct ixgbe_hw *hw;
3227         uint32_t mrqc;
3228         uint64_t rss_hf;
3229         uint32_t mrqc_reg;
3230
3231         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3232
3233         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3234                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3235                         "NIC.");
3236                 return -ENOTSUP;
3237         }
3238         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3239
3240         /*
3241          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3242          *     "RSS enabling cannot be done dynamically while it must be
3243          *      preceded by a software reset"
3244          * Before changing anything, first check that the update RSS operation
3245          * does not attempt to disable RSS, if RSS was enabled at
3246          * initialization time, or does not attempt to enable RSS, if RSS was
3247          * disabled at initialization time.
3248          */
3249         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3250         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3251         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3252                 if (rss_hf != 0) /* Enable RSS */
3253                         return -(EINVAL);
3254                 return 0; /* Nothing to do */
3255         }
3256         /* RSS enabled */
3257         if (rss_hf == 0) /* Disable RSS */
3258                 return -(EINVAL);
3259         ixgbe_hw_rss_hash_set(hw, rss_conf);
3260         return 0;
3261 }
3262
3263 int
3264 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3265                             struct rte_eth_rss_conf *rss_conf)
3266 {
3267         struct ixgbe_hw *hw;
3268         uint8_t *hash_key;
3269         uint32_t mrqc;
3270         uint32_t rss_key;
3271         uint64_t rss_hf;
3272         uint16_t i;
3273         uint32_t mrqc_reg;
3274         uint32_t rssrk_reg;
3275
3276         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3277         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3278         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3279         hash_key = rss_conf->rss_key;
3280         if (hash_key != NULL) {
3281                 /* Return RSS hash key */
3282                 for (i = 0; i < 10; i++) {
3283                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3284                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3285                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3286                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3287                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3288                 }
3289         }
3290
3291         /* Get RSS functions configured in MRQC register */
3292         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3293         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3294                 rss_conf->rss_hf = 0;
3295                 return 0;
3296         }
3297         rss_hf = 0;
3298         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3299                 rss_hf |= ETH_RSS_IPV4;
3300         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3301                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3302         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3303                 rss_hf |= ETH_RSS_IPV6;
3304         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3305                 rss_hf |= ETH_RSS_IPV6_EX;
3306         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3307                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3308         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3309                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3310         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3311                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3312         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3313                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3314         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3315                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3316         rss_conf->rss_hf = rss_hf;
3317         return 0;
3318 }
3319
3320 static void
3321 ixgbe_rss_configure(struct rte_eth_dev *dev)
3322 {
3323         struct rte_eth_rss_conf rss_conf;
3324         struct ixgbe_hw *hw;
3325         uint32_t reta;
3326         uint16_t i;
3327         uint16_t j;
3328         uint16_t sp_reta_size;
3329         uint32_t reta_reg;
3330
3331         PMD_INIT_FUNC_TRACE();
3332         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3333
3334         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3335
3336         /*
3337          * Fill in redirection table
3338          * The byte-swap is needed because NIC registers are in
3339          * little-endian order.
3340          */
3341         reta = 0;
3342         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3343                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3344
3345                 if (j == dev->data->nb_rx_queues)
3346                         j = 0;
3347                 reta = (reta << 8) | j;
3348                 if ((i & 3) == 3)
3349                         IXGBE_WRITE_REG(hw, reta_reg,
3350                                         rte_bswap32(reta));
3351         }
3352
3353         /*
3354          * Configure the RSS key and the RSS protocols used to compute
3355          * the RSS hash of input packets.
3356          */
3357         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3358         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3359                 ixgbe_rss_disable(dev);
3360                 return;
3361         }
3362         if (rss_conf.rss_key == NULL)
3363                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3364         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3365 }
3366
3367 #define NUM_VFTA_REGISTERS 128
3368 #define NIC_RX_BUFFER_SIZE 0x200
3369 #define X550_RX_BUFFER_SIZE 0x180
3370
3371 static void
3372 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3373 {
3374         struct rte_eth_vmdq_dcb_conf *cfg;
3375         struct ixgbe_hw *hw;
3376         enum rte_eth_nb_pools num_pools;
3377         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3378         uint16_t pbsize;
3379         uint8_t nb_tcs; /* number of traffic classes */
3380         int i;
3381
3382         PMD_INIT_FUNC_TRACE();
3383         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3384         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3385         num_pools = cfg->nb_queue_pools;
3386         /* Check we have a valid number of pools */
3387         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3388                 ixgbe_rss_disable(dev);
3389                 return;
3390         }
3391         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3392         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3393
3394         /*
3395          * RXPBSIZE
3396          * split rx buffer up into sections, each for 1 traffic class
3397          */
3398         switch (hw->mac.type) {
3399         case ixgbe_mac_X550:
3400         case ixgbe_mac_X550EM_x:
3401         case ixgbe_mac_X550EM_a:
3402                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3403                 break;
3404         default:
3405                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3406                 break;
3407         }
3408         for (i = 0; i < nb_tcs; i++) {
3409                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3410
3411                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3412                 /* clear 10 bits. */
3413                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3414                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3415         }
3416         /* zero alloc all unused TCs */
3417         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3418                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3419
3420                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3421                 /* clear 10 bits. */
3422                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3423         }
3424
3425         /* MRQC: enable vmdq and dcb */
3426         mrqc = (num_pools == ETH_16_POOLS) ?
3427                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3428         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3429
3430         /* PFVTCTL: turn on virtualisation and set the default pool */
3431         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3432         if (cfg->enable_default_pool) {
3433                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3434         } else {
3435                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3436         }
3437
3438         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3439
3440         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3441         queue_mapping = 0;
3442         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3443                 /*
3444                  * mapping is done with 3 bits per priority,
3445                  * so shift by i*3 each time
3446                  */
3447                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3448
3449         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3450
3451         /* RTRPCS: DCB related */
3452         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3453
3454         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3455         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3456         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3457         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3458
3459         /* VFTA - enable all vlan filters */
3460         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3461                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3462         }
3463
3464         /* VFRE: pool enabling for receive - 16 or 32 */
3465         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3466                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3467
3468         /*
3469          * MPSAR - allow pools to read specific mac addresses
3470          * In this case, all pools should be able to read from mac addr 0
3471          */
3472         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3473         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3474
3475         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3476         for (i = 0; i < cfg->nb_pool_maps; i++) {
3477                 /* set vlan id in VF register and set the valid bit */
3478                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3479                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3480                 /*
3481                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3482                  * pools, we only need to use the first half of the register
3483                  * i.e. bits 0-31
3484                  */
3485                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3486         }
3487 }
3488
3489 /**
3490  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3491  * @dev: pointer to eth_dev structure
3492  * @dcb_config: pointer to ixgbe_dcb_config structure
3493  */
3494 static void
3495 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3496                        struct ixgbe_dcb_config *dcb_config)
3497 {
3498         uint32_t reg;
3499         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3500
3501         PMD_INIT_FUNC_TRACE();
3502         if (hw->mac.type != ixgbe_mac_82598EB) {
3503                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3504                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3505                 reg |= IXGBE_RTTDCS_ARBDIS;
3506                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3507
3508                 /* Enable DCB for Tx with 8 TCs */
3509                 if (dcb_config->num_tcs.pg_tcs == 8) {
3510                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3511                 } else {
3512                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3513                 }
3514                 if (dcb_config->vt_mode)
3515                         reg |= IXGBE_MTQC_VT_ENA;
3516                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3517
3518                 /* Enable the Tx desc arbiter */
3519                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3520                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3521                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3522
3523                 /* Enable Security TX Buffer IFG for DCB */
3524                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3525                 reg |= IXGBE_SECTX_DCB;
3526                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3527         }
3528 }
3529
3530 /**
3531  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3532  * @dev: pointer to rte_eth_dev structure
3533  * @dcb_config: pointer to ixgbe_dcb_config structure
3534  */
3535 static void
3536 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3537                         struct ixgbe_dcb_config *dcb_config)
3538 {
3539         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3540                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3541         struct ixgbe_hw *hw =
3542                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3543
3544         PMD_INIT_FUNC_TRACE();
3545         if (hw->mac.type != ixgbe_mac_82598EB)
3546                 /*PF VF Transmit Enable*/
3547                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3548                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3549
3550         /*Configure general DCB TX parameters*/
3551         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3552 }
3553
3554 static void
3555 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3556                         struct ixgbe_dcb_config *dcb_config)
3557 {
3558         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3559                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3560         struct ixgbe_dcb_tc_config *tc;
3561         uint8_t i, j;
3562
3563         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3564         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3565                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3566                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3567         } else {
3568                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3569                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3570         }
3571
3572         /* Initialize User Priority to Traffic Class mapping */
3573         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3574                 tc = &dcb_config->tc_config[j];
3575                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3576         }
3577
3578         /* User Priority to Traffic Class mapping */
3579         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3580                 j = vmdq_rx_conf->dcb_tc[i];
3581                 tc = &dcb_config->tc_config[j];
3582                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3583                                                 (uint8_t)(1 << i);
3584         }
3585 }
3586
3587 static void
3588 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3589                         struct ixgbe_dcb_config *dcb_config)
3590 {
3591         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3592                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3593         struct ixgbe_dcb_tc_config *tc;
3594         uint8_t i, j;
3595
3596         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3597         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3598                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3599                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3600         } else {
3601                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3602                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3603         }
3604
3605         /* Initialize User Priority to Traffic Class mapping */
3606         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3607                 tc = &dcb_config->tc_config[j];
3608                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3609         }
3610
3611         /* User Priority to Traffic Class mapping */
3612         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3613                 j = vmdq_tx_conf->dcb_tc[i];
3614                 tc = &dcb_config->tc_config[j];
3615                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3616                                                 (uint8_t)(1 << i);
3617         }
3618 }
3619
3620 static void
3621 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3622                 struct ixgbe_dcb_config *dcb_config)
3623 {
3624         struct rte_eth_dcb_rx_conf *rx_conf =
3625                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3626         struct ixgbe_dcb_tc_config *tc;
3627         uint8_t i, j;
3628
3629         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3630         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3631
3632         /* Initialize User Priority to Traffic Class mapping */
3633         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3634                 tc = &dcb_config->tc_config[j];
3635                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3636         }
3637
3638         /* User Priority to Traffic Class mapping */
3639         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3640                 j = rx_conf->dcb_tc[i];
3641                 tc = &dcb_config->tc_config[j];
3642                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3643                                                 (uint8_t)(1 << i);
3644         }
3645 }
3646
3647 static void
3648 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3649                 struct ixgbe_dcb_config *dcb_config)
3650 {
3651         struct rte_eth_dcb_tx_conf *tx_conf =
3652                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3653         struct ixgbe_dcb_tc_config *tc;
3654         uint8_t i, j;
3655
3656         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3657         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3658
3659         /* Initialize User Priority to Traffic Class mapping */
3660         for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3661                 tc = &dcb_config->tc_config[j];
3662                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3663         }
3664
3665         /* User Priority to Traffic Class mapping */
3666         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3667                 j = tx_conf->dcb_tc[i];
3668                 tc = &dcb_config->tc_config[j];
3669                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3670                                                 (uint8_t)(1 << i);
3671         }
3672 }
3673
3674 /**
3675  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3676  * @dev: pointer to eth_dev structure
3677  * @dcb_config: pointer to ixgbe_dcb_config structure
3678  */
3679 static void
3680 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3681                        struct ixgbe_dcb_config *dcb_config)
3682 {
3683         uint32_t reg;
3684         uint32_t vlanctrl;
3685         uint8_t i;
3686         uint32_t q;
3687         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3688
3689         PMD_INIT_FUNC_TRACE();
3690         /*
3691          * Disable the arbiter before changing parameters
3692          * (always enable recycle mode; WSP)
3693          */
3694         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3695         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3696
3697         if (hw->mac.type != ixgbe_mac_82598EB) {
3698                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3699                 if (dcb_config->num_tcs.pg_tcs == 4) {
3700                         if (dcb_config->vt_mode)
3701                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3702                                         IXGBE_MRQC_VMDQRT4TCEN;
3703                         else {
3704                                 /* no matter the mode is DCB or DCB_RSS, just
3705                                  * set the MRQE to RSSXTCEN. RSS is controlled
3706                                  * by RSS_FIELD
3707                                  */
3708                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3709                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3710                                         IXGBE_MRQC_RTRSS4TCEN;
3711                         }
3712                 }
3713                 if (dcb_config->num_tcs.pg_tcs == 8) {
3714                         if (dcb_config->vt_mode)
3715                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3716                                         IXGBE_MRQC_VMDQRT8TCEN;
3717                         else {
3718                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3719                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3720                                         IXGBE_MRQC_RTRSS8TCEN;
3721                         }
3722                 }
3723
3724                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3725
3726                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3727                         /* Disable drop for all queues in VMDQ mode*/
3728                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3729                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3730                                                 (IXGBE_QDE_WRITE |
3731                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3732                 } else {
3733                         /* Enable drop for all queues in SRIOV mode */
3734                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3735                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3736                                                 (IXGBE_QDE_WRITE |
3737                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3738                                                  IXGBE_QDE_ENABLE));
3739                 }
3740         }
3741
3742         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3743         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3744         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3745         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3746
3747         /* VFTA - enable all vlan filters */
3748         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3749                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3750         }
3751
3752         /*
3753          * Configure Rx packet plane (recycle mode; WSP) and
3754          * enable arbiter
3755          */
3756         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3757         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3758 }
3759
3760 static void
3761 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3762                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3763 {
3764         switch (hw->mac.type) {
3765         case ixgbe_mac_82598EB:
3766                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3767                 break;
3768         case ixgbe_mac_82599EB:
3769         case ixgbe_mac_X540:
3770         case ixgbe_mac_X550:
3771         case ixgbe_mac_X550EM_x:
3772         case ixgbe_mac_X550EM_a:
3773                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3774                                                   tsa, map);
3775                 break;
3776         default:
3777                 break;
3778         }
3779 }
3780
3781 static void
3782 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3783                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3784 {
3785         switch (hw->mac.type) {
3786         case ixgbe_mac_82598EB:
3787                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3788                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3789                 break;
3790         case ixgbe_mac_82599EB:
3791         case ixgbe_mac_X540:
3792         case ixgbe_mac_X550:
3793         case ixgbe_mac_X550EM_x:
3794         case ixgbe_mac_X550EM_a:
3795                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3796                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3797                 break;
3798         default:
3799                 break;
3800         }
3801 }
3802
3803 #define DCB_RX_CONFIG  1
3804 #define DCB_TX_CONFIG  1
3805 #define DCB_TX_PB      1024
3806 /**
3807  * ixgbe_dcb_hw_configure - Enable DCB and configure
3808  * general DCB in VT mode and non-VT mode parameters
3809  * @dev: pointer to rte_eth_dev structure
3810  * @dcb_config: pointer to ixgbe_dcb_config structure
3811  */
3812 static int
3813 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3814                         struct ixgbe_dcb_config *dcb_config)
3815 {
3816         int     ret = 0;
3817         uint8_t i, pfc_en, nb_tcs;
3818         uint16_t pbsize, rx_buffer_size;
3819         uint8_t config_dcb_rx = 0;
3820         uint8_t config_dcb_tx = 0;
3821         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3822         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3823         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3824         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3825         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3826         struct ixgbe_dcb_tc_config *tc;
3827         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3828         struct ixgbe_hw *hw =
3829                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3830         struct ixgbe_bw_conf *bw_conf =
3831                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3832
3833         switch (dev->data->dev_conf.rxmode.mq_mode) {
3834         case ETH_MQ_RX_VMDQ_DCB:
3835                 dcb_config->vt_mode = true;
3836                 if (hw->mac.type != ixgbe_mac_82598EB) {
3837                         config_dcb_rx = DCB_RX_CONFIG;
3838                         /*
3839                          *get dcb and VT rx configuration parameters
3840                          *from rte_eth_conf
3841                          */
3842                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3843                         /*Configure general VMDQ and DCB RX parameters*/
3844                         ixgbe_vmdq_dcb_configure(dev);
3845                 }
3846                 break;
3847         case ETH_MQ_RX_DCB:
3848         case ETH_MQ_RX_DCB_RSS:
3849                 dcb_config->vt_mode = false;
3850                 config_dcb_rx = DCB_RX_CONFIG;
3851                 /* Get dcb TX configuration parameters from rte_eth_conf */
3852                 ixgbe_dcb_rx_config(dev, dcb_config);
3853                 /*Configure general DCB RX parameters*/
3854                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3855                 break;
3856         default:
3857                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3858                 break;
3859         }
3860         switch (dev->data->dev_conf.txmode.mq_mode) {
3861         case ETH_MQ_TX_VMDQ_DCB:
3862                 dcb_config->vt_mode = true;
3863                 config_dcb_tx = DCB_TX_CONFIG;
3864                 /* get DCB and VT TX configuration parameters
3865                  * from rte_eth_conf
3866                  */
3867                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3868                 /*Configure general VMDQ and DCB TX parameters*/
3869                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3870                 break;
3871
3872         case ETH_MQ_TX_DCB:
3873                 dcb_config->vt_mode = false;
3874                 config_dcb_tx = DCB_TX_CONFIG;
3875                 /*get DCB TX configuration parameters from rte_eth_conf*/
3876                 ixgbe_dcb_tx_config(dev, dcb_config);
3877                 /*Configure general DCB TX parameters*/
3878                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3879                 break;
3880         default:
3881                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3882                 break;
3883         }
3884
3885         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3886         /* Unpack map */
3887         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3888         if (nb_tcs == ETH_4_TCS) {
3889                 /* Avoid un-configured priority mapping to TC0 */
3890                 uint8_t j = 4;
3891                 uint8_t mask = 0xFF;
3892
3893                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3894                         mask = (uint8_t)(mask & (~(1 << map[i])));
3895                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3896                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3897                                 map[j++] = i;
3898                         mask >>= 1;
3899                 }
3900                 /* Re-configure 4 TCs BW */
3901                 for (i = 0; i < nb_tcs; i++) {
3902                         tc = &dcb_config->tc_config[i];
3903                         if (bw_conf->tc_num != nb_tcs)
3904                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3905                                         (uint8_t)(100 / nb_tcs);
3906                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3907                                                 (uint8_t)(100 / nb_tcs);
3908                 }
3909                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3910                         tc = &dcb_config->tc_config[i];
3911                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3912                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3913                 }
3914         } else {
3915                 /* Re-configure 8 TCs BW */
3916                 for (i = 0; i < nb_tcs; i++) {
3917                         tc = &dcb_config->tc_config[i];
3918                         if (bw_conf->tc_num != nb_tcs)
3919                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3920                                         (uint8_t)(100 / nb_tcs + (i & 1));
3921                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3922                                 (uint8_t)(100 / nb_tcs + (i & 1));
3923                 }
3924         }
3925
3926         switch (hw->mac.type) {
3927         case ixgbe_mac_X550:
3928         case ixgbe_mac_X550EM_x:
3929         case ixgbe_mac_X550EM_a:
3930                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3931                 break;
3932         default:
3933                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3934                 break;
3935         }
3936
3937         if (config_dcb_rx) {
3938                 /* Set RX buffer size */
3939                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3940                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3941
3942                 for (i = 0; i < nb_tcs; i++) {
3943                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3944                 }
3945                 /* zero alloc all unused TCs */
3946                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3947                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3948                 }
3949         }
3950         if (config_dcb_tx) {
3951                 /* Only support an equally distributed
3952                  *  Tx packet buffer strategy.
3953                  */
3954                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3955                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3956
3957                 for (i = 0; i < nb_tcs; i++) {
3958                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3959                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3960                 }
3961                 /* Clear unused TCs, if any, to zero buffer size*/
3962                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3963                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3964                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3965                 }
3966         }
3967
3968         /*Calculates traffic class credits*/
3969         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3970                                 IXGBE_DCB_TX_CONFIG);
3971         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3972                                 IXGBE_DCB_RX_CONFIG);
3973
3974         if (config_dcb_rx) {
3975                 /* Unpack CEE standard containers */
3976                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3977                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3978                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3979                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3980                 /* Configure PG(ETS) RX */
3981                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3982         }
3983
3984         if (config_dcb_tx) {
3985                 /* Unpack CEE standard containers */
3986                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3987                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3988                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3989                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3990                 /* Configure PG(ETS) TX */
3991                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3992         }
3993
3994         /*Configure queue statistics registers*/
3995         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3996
3997         /* Check if the PFC is supported */
3998         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3999                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4000                 for (i = 0; i < nb_tcs; i++) {
4001                         /*
4002                         * If the TC count is 8,and the default high_water is 48,
4003                         * the low_water is 16 as default.
4004                         */
4005                         hw->fc.high_water[i] = (pbsize * 3) / 4;
4006                         hw->fc.low_water[i] = pbsize / 4;
4007                         /* Enable pfc for this TC */
4008                         tc = &dcb_config->tc_config[i];
4009                         tc->pfc = ixgbe_dcb_pfc_enabled;
4010                 }
4011                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4012                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4013                         pfc_en &= 0x0F;
4014                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4015         }
4016
4017         return ret;
4018 }
4019
4020 /**
4021  * ixgbe_configure_dcb - Configure DCB  Hardware
4022  * @dev: pointer to rte_eth_dev
4023  */
4024 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4025 {
4026         struct ixgbe_dcb_config *dcb_cfg =
4027                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4028         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4029
4030         PMD_INIT_FUNC_TRACE();
4031
4032         /* check support mq_mode for DCB */
4033         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4034             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4035             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4036                 return;
4037
4038         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4039                 return;
4040
4041         /** Configure DCB hardware **/
4042         ixgbe_dcb_hw_configure(dev, dcb_cfg);
4043 }
4044
4045 /*
4046  * VMDq only support for 10 GbE NIC.
4047  */
4048 static void
4049 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4050 {
4051         struct rte_eth_vmdq_rx_conf *cfg;
4052         struct ixgbe_hw *hw;
4053         enum rte_eth_nb_pools num_pools;
4054         uint32_t mrqc, vt_ctl, vlanctrl;
4055         uint32_t vmolr = 0;
4056         int i;
4057
4058         PMD_INIT_FUNC_TRACE();
4059         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4060         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4061         num_pools = cfg->nb_queue_pools;
4062
4063         ixgbe_rss_disable(dev);
4064
4065         /* MRQC: enable vmdq */
4066         mrqc = IXGBE_MRQC_VMDQEN;
4067         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4068
4069         /* PFVTCTL: turn on virtualisation and set the default pool */
4070         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4071         if (cfg->enable_default_pool)
4072                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4073         else
4074                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4075
4076         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4077
4078         for (i = 0; i < (int)num_pools; i++) {
4079                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4080                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4081         }
4082
4083         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4084         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4085         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4086         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4087
4088         /* VFTA - enable all vlan filters */
4089         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4090                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4091
4092         /* VFRE: pool enabling for receive - 64 */
4093         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4094         if (num_pools == ETH_64_POOLS)
4095                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4096
4097         /*
4098          * MPSAR - allow pools to read specific mac addresses
4099          * In this case, all pools should be able to read from mac addr 0
4100          */
4101         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4102         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4103
4104         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4105         for (i = 0; i < cfg->nb_pool_maps; i++) {
4106                 /* set vlan id in VF register and set the valid bit */
4107                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4108                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4109                 /*
4110                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4111                  * pools, we only need to use the first half of the register
4112                  * i.e. bits 0-31
4113                  */
4114                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4115                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4116                                         (cfg->pool_map[i].pools & UINT32_MAX));
4117                 else
4118                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4119                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4120
4121         }
4122
4123         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4124         if (cfg->enable_loop_back) {
4125                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4126                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4127                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4128         }
4129
4130         IXGBE_WRITE_FLUSH(hw);
4131 }
4132
4133 /*
4134  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4135  * @hw: pointer to hardware structure
4136  */
4137 static void
4138 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4139 {
4140         uint32_t reg;
4141         uint32_t q;
4142
4143         PMD_INIT_FUNC_TRACE();
4144         /*PF VF Transmit Enable*/
4145         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4146         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4147
4148         /* Disable the Tx desc arbiter so that MTQC can be changed */
4149         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4150         reg |= IXGBE_RTTDCS_ARBDIS;
4151         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4152
4153         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4154         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4155
4156         /* Disable drop for all queues */
4157         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4158                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4159                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4160
4161         /* Enable the Tx desc arbiter */
4162         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4163         reg &= ~IXGBE_RTTDCS_ARBDIS;
4164         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4165
4166         IXGBE_WRITE_FLUSH(hw);
4167 }
4168
4169 static int __attribute__((cold))
4170 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4171 {
4172         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4173         uint64_t dma_addr;
4174         unsigned int i;
4175
4176         /* Initialize software ring entries */
4177         for (i = 0; i < rxq->nb_rx_desc; i++) {
4178                 volatile union ixgbe_adv_rx_desc *rxd;
4179                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4180
4181                 if (mbuf == NULL) {
4182                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4183                                      (unsigned) rxq->queue_id);
4184                         return -ENOMEM;
4185                 }
4186
4187                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4188                 mbuf->port = rxq->port_id;
4189
4190                 dma_addr =
4191                         rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4192                 rxd = &rxq->rx_ring[i];
4193                 rxd->read.hdr_addr = 0;
4194                 rxd->read.pkt_addr = dma_addr;
4195                 rxe[i].mbuf = mbuf;
4196         }
4197
4198         return 0;
4199 }
4200
4201 static int
4202 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4203 {
4204         struct ixgbe_hw *hw;
4205         uint32_t mrqc;
4206
4207         ixgbe_rss_configure(dev);
4208
4209         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4210
4211         /* MRQC: enable VF RSS */
4212         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4213         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4214         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4215         case ETH_64_POOLS:
4216                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4217                 break;
4218
4219         case ETH_32_POOLS:
4220                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4221                 break;
4222
4223         default:
4224                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4225                 return -EINVAL;
4226         }
4227
4228         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4229
4230         return 0;
4231 }
4232
4233 static int
4234 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4235 {
4236         struct ixgbe_hw *hw =
4237                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4238
4239         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4240         case ETH_64_POOLS:
4241                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4242                         IXGBE_MRQC_VMDQEN);
4243                 break;
4244
4245         case ETH_32_POOLS:
4246                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4247                         IXGBE_MRQC_VMDQRT4TCEN);
4248                 break;
4249
4250         case ETH_16_POOLS:
4251                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4252                         IXGBE_MRQC_VMDQRT8TCEN);
4253                 break;
4254         default:
4255                 PMD_INIT_LOG(ERR,
4256                         "invalid pool number in IOV mode");
4257                 break;
4258         }
4259         return 0;
4260 }
4261
4262 static int
4263 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4264 {
4265         struct ixgbe_hw *hw =
4266                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4267
4268         if (hw->mac.type == ixgbe_mac_82598EB)
4269                 return 0;
4270
4271         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4272                 /*
4273                  * SRIOV inactive scheme
4274                  * any DCB/RSS w/o VMDq multi-queue setting
4275                  */
4276                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4277                 case ETH_MQ_RX_RSS:
4278                 case ETH_MQ_RX_DCB_RSS:
4279                 case ETH_MQ_RX_VMDQ_RSS:
4280                         ixgbe_rss_configure(dev);
4281                         break;
4282
4283                 case ETH_MQ_RX_VMDQ_DCB:
4284                         ixgbe_vmdq_dcb_configure(dev);
4285                         break;
4286
4287                 case ETH_MQ_RX_VMDQ_ONLY:
4288                         ixgbe_vmdq_rx_hw_configure(dev);
4289                         break;
4290
4291                 case ETH_MQ_RX_NONE:
4292                 default:
4293                         /* if mq_mode is none, disable rss mode.*/
4294                         ixgbe_rss_disable(dev);
4295                         break;
4296                 }
4297         } else {
4298                 /* SRIOV active scheme
4299                  * Support RSS together with SRIOV.
4300                  */
4301                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4302                 case ETH_MQ_RX_RSS:
4303                 case ETH_MQ_RX_VMDQ_RSS:
4304                         ixgbe_config_vf_rss(dev);
4305                         break;
4306                 case ETH_MQ_RX_VMDQ_DCB:
4307                 case ETH_MQ_RX_DCB:
4308                 /* In SRIOV, the configuration is the same as VMDq case */
4309                         ixgbe_vmdq_dcb_configure(dev);
4310                         break;
4311                 /* DCB/RSS together with SRIOV is not supported */
4312                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4313                 case ETH_MQ_RX_DCB_RSS:
4314                         PMD_INIT_LOG(ERR,
4315                                 "Could not support DCB/RSS with VMDq & SRIOV");
4316                         return -1;
4317                 default:
4318                         ixgbe_config_vf_default(dev);
4319                         break;
4320                 }
4321         }
4322
4323         return 0;
4324 }
4325
4326 static int
4327 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4328 {
4329         struct ixgbe_hw *hw =
4330                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4331         uint32_t mtqc;
4332         uint32_t rttdcs;
4333
4334         if (hw->mac.type == ixgbe_mac_82598EB)
4335                 return 0;
4336
4337         /* disable arbiter before setting MTQC */
4338         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4339         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4340         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4341
4342         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4343                 /*
4344                  * SRIOV inactive scheme
4345                  * any DCB w/o VMDq multi-queue setting
4346                  */
4347                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4348                         ixgbe_vmdq_tx_hw_configure(hw);
4349                 else {
4350                         mtqc = IXGBE_MTQC_64Q_1PB;
4351                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4352                 }
4353         } else {
4354                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4355
4356                 /*
4357                  * SRIOV active scheme
4358                  * FIXME if support DCB together with VMDq & SRIOV
4359                  */
4360                 case ETH_64_POOLS:
4361                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4362                         break;
4363                 case ETH_32_POOLS:
4364                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4365                         break;
4366                 case ETH_16_POOLS:
4367                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4368                                 IXGBE_MTQC_8TC_8TQ;
4369                         break;
4370                 default:
4371                         mtqc = IXGBE_MTQC_64Q_1PB;
4372                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4373                 }
4374                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4375         }
4376
4377         /* re-enable arbiter */
4378         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4379         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4380
4381         return 0;
4382 }
4383
4384 /**
4385  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4386  *
4387  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4388  * spec rev. 3.0 chapter 8.2.3.8.13.
4389  *
4390  * @pool Memory pool of the Rx queue
4391  */
4392 static inline uint32_t
4393 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4394 {
4395         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4396
4397         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4398         uint16_t maxdesc =
4399                 IPV4_MAX_PKT_LEN /
4400                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4401
4402         if (maxdesc >= 16)
4403                 return IXGBE_RSCCTL_MAXDESC_16;
4404         else if (maxdesc >= 8)
4405                 return IXGBE_RSCCTL_MAXDESC_8;
4406         else if (maxdesc >= 4)
4407                 return IXGBE_RSCCTL_MAXDESC_4;
4408         else
4409                 return IXGBE_RSCCTL_MAXDESC_1;
4410 }
4411
4412 /**
4413  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4414  * interrupt
4415  *
4416  * (Taken from FreeBSD tree)
4417  * (yes this is all very magic and confusing :)
4418  *
4419  * @dev port handle
4420  * @entry the register array entry
4421  * @vector the MSIX vector for this queue
4422  * @type RX/TX/MISC
4423  */
4424 static void
4425 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4426 {
4427         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4428         u32 ivar, index;
4429
4430         vector |= IXGBE_IVAR_ALLOC_VAL;
4431
4432         switch (hw->mac.type) {
4433
4434         case ixgbe_mac_82598EB:
4435                 if (type == -1)
4436                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4437                 else
4438                         entry += (type * 64);
4439                 index = (entry >> 2) & 0x1F;
4440                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4441                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4442                 ivar |= (vector << (8 * (entry & 0x3)));
4443                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4444                 break;
4445
4446         case ixgbe_mac_82599EB:
4447         case ixgbe_mac_X540:
4448                 if (type == -1) { /* MISC IVAR */
4449                         index = (entry & 1) * 8;
4450                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4451                         ivar &= ~(0xFF << index);
4452                         ivar |= (vector << index);
4453                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4454                 } else {        /* RX/TX IVARS */
4455                         index = (16 * (entry & 1)) + (8 * type);
4456                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4457                         ivar &= ~(0xFF << index);
4458                         ivar |= (vector << index);
4459                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4460                 }
4461
4462                 break;
4463
4464         default:
4465                 break;
4466         }
4467 }
4468
4469 void __attribute__((cold))
4470 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4471 {
4472         uint16_t i, rx_using_sse;
4473         struct ixgbe_adapter *adapter =
4474                 (struct ixgbe_adapter *)dev->data->dev_private;
4475
4476         /*
4477          * In order to allow Vector Rx there are a few configuration
4478          * conditions to be met and Rx Bulk Allocation should be allowed.
4479          */
4480         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4481             !adapter->rx_bulk_alloc_allowed) {
4482                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4483                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4484                                     "not enabled",
4485                              dev->data->port_id);
4486
4487                 adapter->rx_vec_allowed = false;
4488         }
4489
4490         /*
4491          * Initialize the appropriate LRO callback.
4492          *
4493          * If all queues satisfy the bulk allocation preconditions
4494          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4495          * Otherwise use a single allocation version.
4496          */
4497         if (dev->data->lro) {
4498                 if (adapter->rx_bulk_alloc_allowed) {
4499                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4500                                            "allocation version");
4501                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4502                 } else {
4503                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4504                                            "allocation version");
4505                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4506                 }
4507         } else if (dev->data->scattered_rx) {
4508                 /*
4509                  * Set the non-LRO scattered callback: there are Vector and
4510                  * single allocation versions.
4511                  */
4512                 if (adapter->rx_vec_allowed) {
4513                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4514                                             "callback (port=%d).",
4515                                      dev->data->port_id);
4516
4517                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4518                 } else if (adapter->rx_bulk_alloc_allowed) {
4519                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4520                                            "allocation callback (port=%d).",
4521                                      dev->data->port_id);
4522                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4523                 } else {
4524                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4525                                             "single allocation) "
4526                                             "Scattered Rx callback "
4527                                             "(port=%d).",
4528                                      dev->data->port_id);
4529
4530                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4531                 }
4532         /*
4533          * Below we set "simple" callbacks according to port/queues parameters.
4534          * If parameters allow we are going to choose between the following
4535          * callbacks:
4536          *    - Vector
4537          *    - Bulk Allocation
4538          *    - Single buffer allocation (the simplest one)
4539          */
4540         } else if (adapter->rx_vec_allowed) {
4541                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4542                                     "burst size no less than %d (port=%d).",
4543                              RTE_IXGBE_DESCS_PER_LOOP,
4544                              dev->data->port_id);
4545
4546                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4547         } else if (adapter->rx_bulk_alloc_allowed) {
4548                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4549                                     "satisfied. Rx Burst Bulk Alloc function "
4550                                     "will be used on port=%d.",
4551                              dev->data->port_id);
4552
4553                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4554         } else {
4555                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4556                                     "satisfied, or Scattered Rx is requested "
4557                                     "(port=%d).",
4558                              dev->data->port_id);
4559
4560                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4561         }
4562
4563         /* Propagate information about RX function choice through all queues. */
4564
4565         rx_using_sse =
4566                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4567                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4568
4569         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4570                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4571
4572                 rxq->rx_using_sse = rx_using_sse;
4573 #ifdef RTE_LIBRTE_SECURITY
4574                 rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4575                                 DEV_RX_OFFLOAD_SECURITY);
4576 #endif
4577         }
4578 }
4579
4580 /**
4581  * ixgbe_set_rsc - configure RSC related port HW registers
4582  *
4583  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4584  * of 82599 Spec (x540 configuration is virtually the same).
4585  *
4586  * @dev port handle
4587  *
4588  * Returns 0 in case of success or a non-zero error code
4589  */
4590 static int
4591 ixgbe_set_rsc(struct rte_eth_dev *dev)
4592 {
4593         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4594         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4595         struct rte_eth_dev_info dev_info = { 0 };
4596         bool rsc_capable = false;
4597         uint16_t i;
4598         uint32_t rdrxctl;
4599         uint32_t rfctl;
4600
4601         /* Sanity check */
4602         dev->dev_ops->dev_infos_get(dev, &dev_info);
4603         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4604                 rsc_capable = true;
4605
4606         if (!rsc_capable && rx_conf->enable_lro) {
4607                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4608                                    "support it");
4609                 return -EINVAL;
4610         }
4611
4612         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4613
4614         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4615                 /*
4616                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4617                  * 3.0 RSC configuration requires HW CRC stripping being
4618                  * enabled. If user requested both HW CRC stripping off
4619                  * and RSC on - return an error.
4620                  */
4621                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4622                                     "is disabled");
4623                 return -EINVAL;
4624         }
4625
4626         /* RFCTL configuration  */
4627         rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4628         if ((rsc_capable) && (rx_conf->enable_lro))
4629                 /*
4630                  * Since NFS packets coalescing is not supported - clear
4631                  * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4632                  * enabled.
4633                  */
4634                 rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4635                            IXGBE_RFCTL_NFSR_DIS);
4636         else
4637                 rfctl |= IXGBE_RFCTL_RSC_DIS;
4638         IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4639
4640         /* If LRO hasn't been requested - we are done here. */
4641         if (!rx_conf->enable_lro)
4642                 return 0;
4643
4644         /* Set RDRXCTL.RSCACKC bit */
4645         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4646         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4647         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4648
4649         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4650         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4651                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4652                 uint32_t srrctl =
4653                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4654                 uint32_t rscctl =
4655                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4656                 uint32_t psrtype =
4657                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4658                 uint32_t eitr =
4659                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4660
4661                 /*
4662                  * ixgbe PMD doesn't support header-split at the moment.
4663                  *
4664                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4665                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4666                  * should be configured even if header split is not
4667                  * enabled. We will configure it 128 bytes following the
4668                  * recommendation in the spec.
4669                  */
4670                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4671                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4672                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4673
4674                 /*
4675                  * TODO: Consider setting the Receive Descriptor Minimum
4676                  * Threshold Size for an RSC case. This is not an obviously
4677                  * beneficiary option but the one worth considering...
4678                  */
4679
4680                 rscctl |= IXGBE_RSCCTL_RSCEN;
4681                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4682                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4683
4684                 /*
4685                  * RSC: Set ITR interval corresponding to 2K ints/s.
4686                  *
4687                  * Full-sized RSC aggregations for a 10Gb/s link will
4688                  * arrive at about 20K aggregation/s rate.
4689                  *
4690                  * 2K inst/s rate will make only 10% of the
4691                  * aggregations to be closed due to the interrupt timer
4692                  * expiration for a streaming at wire-speed case.
4693                  *
4694                  * For a sparse streaming case this setting will yield
4695                  * at most 500us latency for a single RSC aggregation.
4696                  */
4697                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4698                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4699
4700                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4701                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4702                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4703                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4704
4705                 /*
4706                  * RSC requires the mapping of the queue to the
4707                  * interrupt vector.
4708                  */
4709                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4710         }
4711
4712         dev->data->lro = 1;
4713
4714         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4715
4716         return 0;
4717 }
4718
4719 /*
4720  * Initializes Receive Unit.
4721  */
4722 int __attribute__((cold))
4723 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4724 {
4725         struct ixgbe_hw     *hw;
4726         struct ixgbe_rx_queue *rxq;
4727         uint64_t bus_addr;
4728         uint32_t rxctrl;
4729         uint32_t fctrl;
4730         uint32_t hlreg0;
4731         uint32_t maxfrs;
4732         uint32_t srrctl;
4733         uint32_t rdrxctl;
4734         uint32_t rxcsum;
4735         uint16_t buf_size;
4736         uint16_t i;
4737         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4738         int rc;
4739
4740         PMD_INIT_FUNC_TRACE();
4741         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4742
4743         /*
4744          * Make sure receives are disabled while setting
4745          * up the RX context (registers, descriptor rings, etc.).
4746          */
4747         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4748         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4749
4750         /* Enable receipt of broadcasted frames */
4751         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4752         fctrl |= IXGBE_FCTRL_BAM;
4753         fctrl |= IXGBE_FCTRL_DPF;
4754         fctrl |= IXGBE_FCTRL_PMCF;
4755         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4756
4757         /*
4758          * Configure CRC stripping, if any.
4759          */
4760         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4761         if (rx_conf->hw_strip_crc)
4762                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4763         else
4764                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4765
4766         /*
4767          * Configure jumbo frame support, if any.
4768          */
4769         if (rx_conf->jumbo_frame == 1) {
4770                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4771                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4772                 maxfrs &= 0x0000FFFF;
4773                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4774                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4775         } else
4776                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4777
4778         /*
4779          * If loopback mode is configured for 82599, set LPBK bit.
4780          */
4781         if (hw->mac.type == ixgbe_mac_82599EB &&
4782                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4783                 hlreg0 |= IXGBE_HLREG0_LPBK;
4784         else
4785                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4786
4787         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4788
4789         /* Setup RX queues */
4790         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4791                 rxq = dev->data->rx_queues[i];
4792
4793                 /*
4794                  * Reset crc_len in case it was changed after queue setup by a
4795                  * call to configure.
4796                  */
4797                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4798
4799                 /* Setup the Base and Length of the Rx Descriptor Rings */
4800                 bus_addr = rxq->rx_ring_phys_addr;
4801                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4802                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4803                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4804                                 (uint32_t)(bus_addr >> 32));
4805                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4806                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4807                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4808                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4809
4810                 /* Configure the SRRCTL register */
4811 #ifdef RTE_HEADER_SPLIT_ENABLE
4812                 /*
4813                  * Configure Header Split
4814                  */
4815                 if (rx_conf->header_split) {
4816                         if (hw->mac.type == ixgbe_mac_82599EB) {
4817                                 /* Must setup the PSRTYPE register */
4818                                 uint32_t psrtype;
4819
4820                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4821                                         IXGBE_PSRTYPE_UDPHDR   |
4822                                         IXGBE_PSRTYPE_IPV4HDR  |
4823                                         IXGBE_PSRTYPE_IPV6HDR;
4824                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4825                         }
4826                         srrctl = ((rx_conf->split_hdr_size <<
4827                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4828                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4829                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4830                 } else
4831 #endif
4832                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4833
4834                 /* Set if packets are dropped when no descriptors available */
4835                 if (rxq->drop_en)
4836                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4837
4838                 /*
4839                  * Configure the RX buffer size in the BSIZEPACKET field of
4840                  * the SRRCTL register of the queue.
4841                  * The value is in 1 KB resolution. Valid values can be from
4842                  * 1 KB to 16 KB.
4843                  */
4844                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4845                         RTE_PKTMBUF_HEADROOM);
4846                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4847                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4848
4849                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4850
4851                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4852                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4853
4854                 /* It adds dual VLAN length for supporting dual VLAN */
4855                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4856                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4857                         dev->data->scattered_rx = 1;
4858         }
4859
4860         if (rx_conf->enable_scatter)
4861                 dev->data->scattered_rx = 1;
4862
4863         /*
4864          * Device configured with multiple RX queues.
4865          */
4866         ixgbe_dev_mq_rx_configure(dev);
4867
4868         /*
4869          * Setup the Checksum Register.
4870          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4871          * Enable IP/L4 checkum computation by hardware if requested to do so.
4872          */
4873         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4874         rxcsum |= IXGBE_RXCSUM_PCSD;
4875         if (rx_conf->hw_ip_checksum)
4876                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4877         else
4878                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4879
4880         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4881
4882         if (hw->mac.type == ixgbe_mac_82599EB ||
4883             hw->mac.type == ixgbe_mac_X540) {
4884                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4885                 if (rx_conf->hw_strip_crc)
4886                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4887                 else
4888                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4889                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4890                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4891         }
4892
4893         rc = ixgbe_set_rsc(dev);
4894         if (rc)
4895                 return rc;
4896
4897         ixgbe_set_rx_function(dev);
4898
4899         return 0;
4900 }
4901
4902 /*
4903  * Initializes Transmit Unit.
4904  */
4905 void __attribute__((cold))
4906 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4907 {
4908         struct ixgbe_hw     *hw;
4909         struct ixgbe_tx_queue *txq;
4910         uint64_t bus_addr;
4911         uint32_t hlreg0;
4912         uint32_t txctrl;
4913         uint16_t i;
4914
4915         PMD_INIT_FUNC_TRACE();
4916         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4917
4918         /* Enable TX CRC (checksum offload requirement) and hw padding
4919          * (TSO requirement)
4920          */
4921         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4922         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4923         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4924
4925         /* Setup the Base and Length of the Tx Descriptor Rings */
4926         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4927                 txq = dev->data->tx_queues[i];
4928
4929                 bus_addr = txq->tx_ring_phys_addr;
4930                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4931                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4932                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4933                                 (uint32_t)(bus_addr >> 32));
4934                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4935                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4936                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4937                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4938                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4939
4940                 /*
4941                  * Disable Tx Head Writeback RO bit, since this hoses
4942                  * bookkeeping if things aren't delivered in order.
4943                  */
4944                 switch (hw->mac.type) {
4945                 case ixgbe_mac_82598EB:
4946                         txctrl = IXGBE_READ_REG(hw,
4947                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4948                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4949                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4950                                         txctrl);
4951                         break;
4952
4953                 case ixgbe_mac_82599EB:
4954                 case ixgbe_mac_X540:
4955                 case ixgbe_mac_X550:
4956                 case ixgbe_mac_X550EM_x:
4957                 case ixgbe_mac_X550EM_a:
4958                 default:
4959                         txctrl = IXGBE_READ_REG(hw,
4960                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4961                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4962                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4963                                         txctrl);
4964                         break;
4965                 }
4966         }
4967
4968         /* Device configured with multiple TX queues. */
4969         ixgbe_dev_mq_tx_configure(dev);
4970 }
4971
4972 /*
4973  * Set up link for 82599 loopback mode Tx->Rx.
4974  */
4975 static inline void __attribute__((cold))
4976 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4977 {
4978         PMD_INIT_FUNC_TRACE();
4979
4980         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4981                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4982                                 IXGBE_SUCCESS) {
4983                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4984                         /* ignore error */
4985                         return;
4986                 }
4987         }
4988
4989         /* Restart link */
4990         IXGBE_WRITE_REG(hw,
4991                         IXGBE_AUTOC,
4992                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4993         ixgbe_reset_pipeline_82599(hw);
4994
4995         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4996         msec_delay(50);
4997 }
4998
4999
5000 /*
5001  * Start Transmit and Receive Units.
5002  */
5003 int __attribute__((cold))
5004 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5005 {
5006         struct ixgbe_hw     *hw;
5007         struct ixgbe_tx_queue *txq;
5008         struct ixgbe_rx_queue *rxq;
5009         uint32_t txdctl;
5010         uint32_t dmatxctl;
5011         uint32_t rxctrl;
5012         uint16_t i;
5013         int ret = 0;
5014
5015         PMD_INIT_FUNC_TRACE();
5016         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5017
5018         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5019                 txq = dev->data->tx_queues[i];
5020                 /* Setup Transmit Threshold Registers */
5021                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5022                 txdctl |= txq->pthresh & 0x7F;
5023                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5024                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5025                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5026         }
5027
5028         if (hw->mac.type != ixgbe_mac_82598EB) {
5029                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5030                 dmatxctl |= IXGBE_DMATXCTL_TE;
5031                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5032         }
5033
5034         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5035                 txq = dev->data->tx_queues[i];
5036                 if (!txq->tx_deferred_start) {
5037                         ret = ixgbe_dev_tx_queue_start(dev, i);
5038                         if (ret < 0)
5039                                 return ret;
5040                 }
5041         }
5042
5043         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5044                 rxq = dev->data->rx_queues[i];
5045                 if (!rxq->rx_deferred_start) {
5046                         ret = ixgbe_dev_rx_queue_start(dev, i);
5047                         if (ret < 0)
5048                                 return ret;
5049                 }
5050         }
5051
5052         /* Enable Receive engine */
5053         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5054         if (hw->mac.type == ixgbe_mac_82598EB)
5055                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
5056         rxctrl |= IXGBE_RXCTRL_RXEN;
5057         hw->mac.ops.enable_rx_dma(hw, rxctrl);
5058
5059         /* If loopback mode is enabled for 82599, set up the link accordingly */
5060         if (hw->mac.type == ixgbe_mac_82599EB &&
5061                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
5062                 ixgbe_setup_loopback_link_82599(hw);
5063
5064 #ifdef RTE_LIBRTE_SECURITY
5065         if ((dev->data->dev_conf.rxmode.offloads &
5066                         DEV_RX_OFFLOAD_SECURITY) ||
5067                 (dev->data->dev_conf.txmode.offloads &
5068                         DEV_TX_OFFLOAD_SECURITY)) {
5069                 ret = ixgbe_crypto_enable_ipsec(dev);
5070                 if (ret != 0) {
5071                         PMD_DRV_LOG(ERR,
5072                                     "ixgbe_crypto_enable_ipsec fails with %d.",
5073                                     ret);
5074                         return ret;
5075                 }
5076         }
5077 #endif
5078
5079         return 0;
5080 }
5081
5082 /*
5083  * Start Receive Units for specified queue.
5084  */
5085 int __attribute__((cold))
5086 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5087 {
5088         struct ixgbe_hw     *hw;
5089         struct ixgbe_rx_queue *rxq;
5090         uint32_t rxdctl;
5091         int poll_ms;
5092
5093         PMD_INIT_FUNC_TRACE();
5094         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5095
5096         if (rx_queue_id < dev->data->nb_rx_queues) {
5097                 rxq = dev->data->rx_queues[rx_queue_id];
5098
5099                 /* Allocate buffers for descriptor rings */
5100                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5101                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5102                                      rx_queue_id);
5103                         return -1;
5104                 }
5105                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5106                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5107                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5108
5109                 /* Wait until RX Enable ready */
5110                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5111                 do {
5112                         rte_delay_ms(1);
5113                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5114                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5115                 if (!poll_ms)
5116                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5117                                      rx_queue_id);
5118                 rte_wmb();
5119                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5120                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5121                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5122         } else
5123                 return -1;
5124
5125         return 0;
5126 }
5127
5128 /*
5129  * Stop Receive Units for specified queue.
5130  */
5131 int __attribute__((cold))
5132 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5133 {
5134         struct ixgbe_hw     *hw;
5135         struct ixgbe_adapter *adapter =
5136                 (struct ixgbe_adapter *)dev->data->dev_private;
5137         struct ixgbe_rx_queue *rxq;
5138         uint32_t rxdctl;
5139         int poll_ms;
5140
5141         PMD_INIT_FUNC_TRACE();
5142         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5143
5144         if (rx_queue_id < dev->data->nb_rx_queues) {
5145                 rxq = dev->data->rx_queues[rx_queue_id];
5146
5147                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5148                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5149                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5150
5151                 /* Wait until RX Enable bit clear */
5152                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5153                 do {
5154                         rte_delay_ms(1);
5155                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5156                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5157                 if (!poll_ms)
5158                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5159                                      rx_queue_id);
5160
5161                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5162
5163                 ixgbe_rx_queue_release_mbufs(rxq);
5164                 ixgbe_reset_rx_queue(adapter, rxq);
5165                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5166         } else
5167                 return -1;
5168
5169         return 0;
5170 }
5171
5172
5173 /*
5174  * Start Transmit Units for specified queue.
5175  */
5176 int __attribute__((cold))
5177 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5178 {
5179         struct ixgbe_hw     *hw;
5180         struct ixgbe_tx_queue *txq;
5181         uint32_t txdctl;
5182         int poll_ms;
5183
5184         PMD_INIT_FUNC_TRACE();
5185         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5186
5187         if (tx_queue_id < dev->data->nb_tx_queues) {
5188                 txq = dev->data->tx_queues[tx_queue_id];
5189                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5190                 txdctl |= IXGBE_TXDCTL_ENABLE;
5191                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5192
5193                 /* Wait until TX Enable ready */
5194                 if (hw->mac.type == ixgbe_mac_82599EB) {
5195                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5196                         do {
5197                                 rte_delay_ms(1);
5198                                 txdctl = IXGBE_READ_REG(hw,
5199                                         IXGBE_TXDCTL(txq->reg_idx));
5200                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5201                         if (!poll_ms)
5202                                 PMD_INIT_LOG(ERR, "Could not enable "
5203                                              "Tx Queue %d", tx_queue_id);
5204                 }
5205                 rte_wmb();
5206                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5207                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5208                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5209         } else
5210                 return -1;
5211
5212         return 0;
5213 }
5214
5215 /*
5216  * Stop Transmit Units for specified queue.
5217  */
5218 int __attribute__((cold))
5219 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5220 {
5221         struct ixgbe_hw     *hw;
5222         struct ixgbe_tx_queue *txq;
5223         uint32_t txdctl;
5224         uint32_t txtdh, txtdt;
5225         int poll_ms;
5226
5227         PMD_INIT_FUNC_TRACE();
5228         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5229
5230         if (tx_queue_id >= dev->data->nb_tx_queues)
5231                 return -1;
5232
5233         txq = dev->data->tx_queues[tx_queue_id];
5234
5235         /* Wait until TX queue is empty */
5236         if (hw->mac.type == ixgbe_mac_82599EB) {
5237                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5238                 do {
5239                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5240                         txtdh = IXGBE_READ_REG(hw,
5241                                                IXGBE_TDH(txq->reg_idx));
5242                         txtdt = IXGBE_READ_REG(hw,
5243                                                IXGBE_TDT(txq->reg_idx));
5244                 } while (--poll_ms && (txtdh != txtdt));
5245                 if (!poll_ms)
5246                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5247                                      "when stopping.", tx_queue_id);
5248         }
5249
5250         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5251         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5252         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5253
5254         /* Wait until TX Enable bit clear */
5255         if (hw->mac.type == ixgbe_mac_82599EB) {
5256                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5257                 do {
5258                         rte_delay_ms(1);
5259                         txdctl = IXGBE_READ_REG(hw,
5260                                                 IXGBE_TXDCTL(txq->reg_idx));
5261                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5262                 if (!poll_ms)
5263                         PMD_INIT_LOG(ERR, "Could not disable "
5264                                      "Tx Queue %d", tx_queue_id);
5265         }
5266
5267         if (txq->ops != NULL) {
5268                 txq->ops->release_mbufs(txq);
5269                 txq->ops->reset(txq);
5270         }
5271         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5272
5273         return 0;
5274 }
5275
5276 void
5277 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5278         struct rte_eth_rxq_info *qinfo)
5279 {
5280         struct ixgbe_rx_queue *rxq;
5281
5282         rxq = dev->data->rx_queues[queue_id];
5283
5284         qinfo->mp = rxq->mb_pool;
5285         qinfo->scattered_rx = dev->data->scattered_rx;
5286         qinfo->nb_desc = rxq->nb_rx_desc;
5287
5288         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5289         qinfo->conf.rx_drop_en = rxq->drop_en;
5290         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5291 }
5292
5293 void
5294 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5295         struct rte_eth_txq_info *qinfo)
5296 {
5297         struct ixgbe_tx_queue *txq;
5298
5299         txq = dev->data->tx_queues[queue_id];
5300
5301         qinfo->nb_desc = txq->nb_tx_desc;
5302
5303         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5304         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5305         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5306
5307         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5308         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5309         qinfo->conf.txq_flags = txq->txq_flags;
5310         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5311 }
5312
5313 /*
5314  * [VF] Initializes Receive Unit.
5315  */
5316 int __attribute__((cold))
5317 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5318 {
5319         struct ixgbe_hw     *hw;
5320         struct ixgbe_rx_queue *rxq;
5321         uint64_t bus_addr;
5322         uint32_t srrctl, psrtype = 0;
5323         uint16_t buf_size;
5324         uint16_t i;
5325         int ret;
5326
5327         PMD_INIT_FUNC_TRACE();
5328         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5329
5330         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5331                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5332                         "it should be power of 2");
5333                 return -1;
5334         }
5335
5336         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5337                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5338                         "it should be equal to or less than %d",
5339                         hw->mac.max_rx_queues);
5340                 return -1;
5341         }
5342
5343         /*
5344          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5345          * disables the VF receipt of packets if the PF MTU is > 1500.
5346          * This is done to deal with 82599 limitations that imposes
5347          * the PF and all VFs to share the same MTU.
5348          * Then, the PF driver enables again the VF receipt of packet when
5349          * the VF driver issues a IXGBE_VF_SET_LPE request.
5350          * In the meantime, the VF device cannot be used, even if the VF driver
5351          * and the Guest VM network stack are ready to accept packets with a
5352          * size up to the PF MTU.
5353          * As a work-around to this PF behaviour, force the call to
5354          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5355          * VF packets received can work in all cases.
5356          */
5357         ixgbevf_rlpml_set_vf(hw,
5358                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5359
5360         /* Setup RX queues */
5361         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5362                 rxq = dev->data->rx_queues[i];
5363
5364                 /* Allocate buffers for descriptor rings */
5365                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5366                 if (ret)
5367                         return ret;
5368
5369                 /* Setup the Base and Length of the Rx Descriptor Rings */
5370                 bus_addr = rxq->rx_ring_phys_addr;
5371
5372                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5373                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5374                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5375                                 (uint32_t)(bus_addr >> 32));
5376                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5377                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5378                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5379                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5380
5381
5382                 /* Configure the SRRCTL register */
5383 #ifdef RTE_HEADER_SPLIT_ENABLE
5384                 /*
5385                  * Configure Header Split
5386                  */
5387                 if (dev->data->dev_conf.rxmode.header_split) {
5388                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5389                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5390                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5391                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5392                 } else
5393 #endif
5394                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5395
5396                 /* Set if packets are dropped when no descriptors available */
5397                 if (rxq->drop_en)
5398                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5399
5400                 /*
5401                  * Configure the RX buffer size in the BSIZEPACKET field of
5402                  * the SRRCTL register of the queue.
5403                  * The value is in 1 KB resolution. Valid values can be from
5404                  * 1 KB to 16 KB.
5405                  */
5406                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5407                         RTE_PKTMBUF_HEADROOM);
5408                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5409                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5410
5411                 /*
5412                  * VF modification to write virtual function SRRCTL register
5413                  */
5414                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5415
5416                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5417                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5418
5419                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5420                     /* It adds dual VLAN length for supporting dual VLAN */
5421                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5422                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5423                         if (!dev->data->scattered_rx)
5424                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5425                         dev->data->scattered_rx = 1;
5426                 }
5427         }
5428
5429 #ifdef RTE_HEADER_SPLIT_ENABLE
5430         if (dev->data->dev_conf.rxmode.header_split)
5431                 /* Must setup the PSRTYPE register */
5432                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5433                         IXGBE_PSRTYPE_UDPHDR   |
5434                         IXGBE_PSRTYPE_IPV4HDR  |
5435                         IXGBE_PSRTYPE_IPV6HDR;
5436 #endif
5437
5438         /* Set RQPL for VF RSS according to max Rx queue */
5439         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5440                 IXGBE_PSRTYPE_RQPL_SHIFT;
5441         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5442
5443         ixgbe_set_rx_function(dev);
5444
5445         return 0;
5446 }
5447
5448 /*
5449  * [VF] Initializes Transmit Unit.
5450  */
5451 void __attribute__((cold))
5452 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5453 {
5454         struct ixgbe_hw     *hw;
5455         struct ixgbe_tx_queue *txq;
5456         uint64_t bus_addr;
5457         uint32_t txctrl;
5458         uint16_t i;
5459
5460         PMD_INIT_FUNC_TRACE();
5461         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5462
5463         /* Setup the Base and Length of the Tx Descriptor Rings */
5464         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5465                 txq = dev->data->tx_queues[i];
5466                 bus_addr = txq->tx_ring_phys_addr;
5467                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5468                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5469                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5470                                 (uint32_t)(bus_addr >> 32));
5471                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5472                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5473                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5474                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5475                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5476
5477                 /*
5478                  * Disable Tx Head Writeback RO bit, since this hoses
5479                  * bookkeeping if things aren't delivered in order.
5480                  */
5481                 txctrl = IXGBE_READ_REG(hw,
5482                                 IXGBE_VFDCA_TXCTRL(i));
5483                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5484                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5485                                 txctrl);
5486         }
5487 }
5488
5489 /*
5490  * [VF] Start Transmit and Receive Units.
5491  */
5492 void __attribute__((cold))
5493 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5494 {
5495         struct ixgbe_hw     *hw;
5496         struct ixgbe_tx_queue *txq;
5497         struct ixgbe_rx_queue *rxq;
5498         uint32_t txdctl;
5499         uint32_t rxdctl;
5500         uint16_t i;
5501         int poll_ms;
5502
5503         PMD_INIT_FUNC_TRACE();
5504         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5505
5506         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5507                 txq = dev->data->tx_queues[i];
5508                 /* Setup Transmit Threshold Registers */
5509                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5510                 txdctl |= txq->pthresh & 0x7F;
5511                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5512                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5513                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5514         }
5515
5516         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5517
5518                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5519                 txdctl |= IXGBE_TXDCTL_ENABLE;
5520                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5521
5522                 poll_ms = 10;
5523                 /* Wait until TX Enable ready */
5524                 do {
5525                         rte_delay_ms(1);
5526                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5527                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5528                 if (!poll_ms)
5529                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5530         }
5531         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5532
5533                 rxq = dev->data->rx_queues[i];
5534
5535                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5536                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5537                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5538
5539                 /* Wait until RX Enable ready */
5540                 poll_ms = 10;
5541                 do {
5542                         rte_delay_ms(1);
5543                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5544                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5545                 if (!poll_ms)
5546                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5547                 rte_wmb();
5548                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5549
5550         }
5551 }
5552
5553 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5554 int __attribute__((weak))
5555 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5556 {
5557         return -1;
5558 }
5559
5560 uint16_t __attribute__((weak))
5561 ixgbe_recv_pkts_vec(
5562         void __rte_unused *rx_queue,
5563         struct rte_mbuf __rte_unused **rx_pkts,
5564         uint16_t __rte_unused nb_pkts)
5565 {
5566         return 0;
5567 }
5568
5569 uint16_t __attribute__((weak))
5570 ixgbe_recv_scattered_pkts_vec(
5571         void __rte_unused *rx_queue,
5572         struct rte_mbuf __rte_unused **rx_pkts,
5573         uint16_t __rte_unused nb_pkts)
5574 {
5575         return 0;
5576 }
5577
5578 int __attribute__((weak))
5579 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5580 {
5581         return -1;
5582 }