ethdev: move a queue id check to generic layer
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i, nb_free = 0;
130         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
131
132         /* check DD bit on threshold descriptor */
133         status = txq->tx_ring[txq->tx_next_dd].wb.status;
134         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
135                 return 0;
136
137         /*
138          * first buffer to free from S/W ring is at index
139          * tx_next_dd - (tx_rs_thresh-1)
140          */
141         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
142
143         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
144                 /* free buffers one at a time */
145                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
146                 txep->mbuf = NULL;
147
148                 if (unlikely(m == NULL))
149                         continue;
150
151                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
152                     (nb_free > 0 && m->pool != free[0]->pool)) {
153                         rte_mempool_put_bulk(free[0]->pool,
154                                              (void **)free, nb_free);
155                         nb_free = 0;
156                 }
157
158                 free[nb_free++] = m;
159         }
160
161         if (nb_free > 0)
162                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
163
164         /* buffers were freed, update counters */
165         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
166         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
167         if (txq->tx_next_dd >= txq->nb_tx_desc)
168                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
169
170         return txq->tx_rs_thresh;
171 }
172
173 /* Populate 4 descriptors with data from 4 mbufs */
174 static inline void
175 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
176 {
177         uint64_t buf_dma_addr;
178         uint32_t pkt_len;
179         int i;
180
181         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
182                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
183                 pkt_len = (*pkts)->data_len;
184
185                 /* write data to descriptor */
186                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187
188                 txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190
191                 txdp->read.olinfo_status =
192                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
193
194                 rte_prefetch0(&(*pkts)->pool);
195         }
196 }
197
198 /* Populate 1 descriptor with data from 1 mbuf */
199 static inline void
200 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
201 {
202         uint64_t buf_dma_addr;
203         uint32_t pkt_len;
204
205         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
206         pkt_len = (*pkts)->data_len;
207
208         /* write data to descriptor */
209         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
210         txdp->read.cmd_type_len =
211                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
212         txdp->read.olinfo_status =
213                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
214         rte_prefetch0(&(*pkts)->pool);
215 }
216
217 /*
218  * Fill H/W descriptor ring with mbuf data.
219  * Copy mbuf pointers to the S/W ring.
220  */
221 static inline void
222 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
223                       uint16_t nb_pkts)
224 {
225         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
226         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
227         const int N_PER_LOOP = 4;
228         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
229         int mainpart, leftover;
230         int i, j;
231
232         /*
233          * Process most of the packets in chunks of N pkts.  Any
234          * leftover packets will get processed one at a time.
235          */
236         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
237         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
238         for (i = 0; i < mainpart; i += N_PER_LOOP) {
239                 /* Copy N mbuf pointers to the S/W ring */
240                 for (j = 0; j < N_PER_LOOP; ++j) {
241                         (txep + i + j)->mbuf = *(pkts + i + j);
242                 }
243                 tx4(txdp + i, pkts + i);
244         }
245
246         if (unlikely(leftover > 0)) {
247                 for (i = 0; i < leftover; ++i) {
248                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
249                         tx1(txdp + mainpart + i, pkts + mainpart + i);
250                 }
251         }
252 }
253
254 static inline uint16_t
255 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
256              uint16_t nb_pkts)
257 {
258         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
259         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
260         uint16_t n = 0;
261
262         /*
263          * Begin scanning the H/W ring for done descriptors when the
264          * number of available descriptors drops below tx_free_thresh.  For
265          * each done descriptor, free the associated buffer.
266          */
267         if (txq->nb_tx_free < txq->tx_free_thresh)
268                 ixgbe_tx_free_bufs(txq);
269
270         /* Only use descriptors that are available */
271         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
272         if (unlikely(nb_pkts == 0))
273                 return 0;
274
275         /* Use exactly nb_pkts descriptors */
276         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
277
278         /*
279          * At this point, we know there are enough descriptors in the
280          * ring to transmit all the packets.  This assumes that each
281          * mbuf contains a single segment, and that no new offloads
282          * are expected, which would require a new context descriptor.
283          */
284
285         /*
286          * See if we're going to wrap-around. If so, handle the top
287          * of the descriptor ring first, then do the bottom.  If not,
288          * the processing looks just like the "bottom" part anyway...
289          */
290         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
291                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
292                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
293
294                 /*
295                  * We know that the last descriptor in the ring will need to
296                  * have its RS bit set because tx_rs_thresh has to be
297                  * a divisor of the ring size
298                  */
299                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
300                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
301                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302
303                 txq->tx_tail = 0;
304         }
305
306         /* Fill H/W descriptor ring with mbuf data */
307         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
308         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
309
310         /*
311          * Determine if RS bit should be set
312          * This is what we actually want:
313          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
314          * but instead of subtracting 1 and doing >=, we can just do
315          * greater than without subtracting.
316          */
317         if (txq->tx_tail > txq->tx_next_rs) {
318                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
319                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
320                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
321                                                 txq->tx_rs_thresh);
322                 if (txq->tx_next_rs >= txq->nb_tx_desc)
323                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
324         }
325
326         /*
327          * Check for wrap-around. This would only happen if we used
328          * up to the last descriptor in the ring, no more, no less.
329          */
330         if (txq->tx_tail >= txq->nb_tx_desc)
331                 txq->tx_tail = 0;
332
333         /* update tail pointer */
334         rte_wmb();
335         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
336
337         return nb_pkts;
338 }
339
340 uint16_t
341 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
342                        uint16_t nb_pkts)
343 {
344         uint16_t nb_tx;
345
346         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
347         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
348                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
349
350         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
351         nb_tx = 0;
352         while (nb_pkts) {
353                 uint16_t ret, n;
354
355                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
356                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
357                 nb_tx = (uint16_t)(nb_tx + ret);
358                 nb_pkts = (uint16_t)(nb_pkts - ret);
359                 if (ret < n)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
370 {
371         uint32_t type_tucmd_mlhl;
372         uint32_t mss_l4len_idx = 0;
373         uint32_t ctx_idx;
374         uint32_t vlan_macip_lens;
375         union ixgbe_tx_offload tx_offload_mask;
376         uint32_t seqnum_seed = 0;
377
378         ctx_idx = txq->ctx_curr;
379         tx_offload_mask.data[0] = 0;
380         tx_offload_mask.data[1] = 0;
381         type_tucmd_mlhl = 0;
382
383         /* Specify which HW CTX to upload. */
384         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
385
386         if (ol_flags & PKT_TX_VLAN_PKT) {
387                 tx_offload_mask.vlan_tci |= ~0;
388         }
389
390         /* check if TCP segmentation required for this packet */
391         if (ol_flags & PKT_TX_TCP_SEG) {
392                 /* implies IP cksum in IPv4 */
393                 if (ol_flags & PKT_TX_IP_CKSUM)
394                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
397                 else
398                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
401
402                 tx_offload_mask.l2_len |= ~0;
403                 tx_offload_mask.l3_len |= ~0;
404                 tx_offload_mask.l4_len |= ~0;
405                 tx_offload_mask.tso_segsz |= ~0;
406                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408         } else { /* no TSO, check if hardware checksum is needed */
409                 if (ol_flags & PKT_TX_IP_CKSUM) {
410                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411                         tx_offload_mask.l2_len |= ~0;
412                         tx_offload_mask.l3_len |= ~0;
413                 }
414
415                 switch (ol_flags & PKT_TX_L4_MASK) {
416                 case PKT_TX_UDP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427                         tx_offload_mask.l2_len |= ~0;
428                         tx_offload_mask.l3_len |= ~0;
429                         break;
430                 case PKT_TX_SCTP_CKSUM:
431                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
432                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
434                         tx_offload_mask.l2_len |= ~0;
435                         tx_offload_mask.l3_len |= ~0;
436                         break;
437                 default:
438                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
439                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
440                         break;
441                 }
442         }
443
444         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
445                 tx_offload_mask.outer_l2_len |= ~0;
446                 tx_offload_mask.outer_l3_len |= ~0;
447                 tx_offload_mask.l2_len |= ~0;
448                 seqnum_seed |= tx_offload.outer_l3_len
449                                << IXGBE_ADVTXD_OUTER_IPLEN;
450                 seqnum_seed |= tx_offload.l2_len
451                                << IXGBE_ADVTXD_TUNNEL_LEN;
452         }
453
454         txq->ctx_cache[ctx_idx].flags = ol_flags;
455         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
456                 tx_offload_mask.data[0] & tx_offload.data[0];
457         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
458                 tx_offload_mask.data[1] & tx_offload.data[1];
459         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
460
461         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
462         vlan_macip_lens = tx_offload.l3_len;
463         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
464                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
465                                     IXGBE_ADVTXD_MACLEN_SHIFT);
466         else
467                 vlan_macip_lens |= (tx_offload.l2_len <<
468                                     IXGBE_ADVTXD_MACLEN_SHIFT);
469         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
470         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
471         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
472         ctx_txd->seqnum_seed     = seqnum_seed;
473 }
474
475 /*
476  * Check which hardware context can be used. Use the existing match
477  * or create a new context descriptor.
478  */
479 static inline uint32_t
480 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
481                    union ixgbe_tx_offload tx_offload)
482 {
483         /* If match with the current used context */
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* What if match with the next context  */
494         txq->ctx_curr ^= 1;
495         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
496                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
497                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
498                      & tx_offload.data[0])) &&
499                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
500                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
501                      & tx_offload.data[1]))))
502                 return txq->ctx_curr;
503
504         /* Mismatch, use the previous context */
505         return IXGBE_CTX_NUM;
506 }
507
508 static inline uint32_t
509 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
510 {
511         uint32_t tmp = 0;
512
513         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
514                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
515         if (ol_flags & PKT_TX_IP_CKSUM)
516                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
517         if (ol_flags & PKT_TX_TCP_SEG)
518                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
519         return tmp;
520 }
521
522 static inline uint32_t
523 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
524 {
525         uint32_t cmdtype = 0;
526
527         if (ol_flags & PKT_TX_VLAN_PKT)
528                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
529         if (ol_flags & PKT_TX_TCP_SEG)
530                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
531         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
532                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
533         if (ol_flags & PKT_TX_MACSEC)
534                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
535         return cmdtype;
536 }
537
538 /* Default RS bit threshold values */
539 #ifndef DEFAULT_TX_RS_THRESH
540 #define DEFAULT_TX_RS_THRESH   32
541 #endif
542 #ifndef DEFAULT_TX_FREE_THRESH
543 #define DEFAULT_TX_FREE_THRESH 32
544 #endif
545
546 /* Reset transmit descriptors after they have been used */
547 static inline int
548 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
549 {
550         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
551         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
552         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
553         uint16_t nb_tx_desc = txq->nb_tx_desc;
554         uint16_t desc_to_clean_to;
555         uint16_t nb_tx_to_clean;
556         uint32_t status;
557
558         /* Determine the last descriptor needing to be cleaned */
559         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
560         if (desc_to_clean_to >= nb_tx_desc)
561                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
562
563         /* Check to make sure the last descriptor to clean is done */
564         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
565         status = txr[desc_to_clean_to].wb.status;
566         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
567                 PMD_TX_FREE_LOG(DEBUG,
568                                 "TX descriptor %4u is not done"
569                                 "(port=%d queue=%d)",
570                                 desc_to_clean_to,
571                                 txq->port_id, txq->queue_id);
572                 /* Failed to clean any descriptors, better luck next time */
573                 return -(1);
574         }
575
576         /* Figure out how many descriptors will be cleaned */
577         if (last_desc_cleaned > desc_to_clean_to)
578                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
579                                                         desc_to_clean_to);
580         else
581                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
582                                                 last_desc_cleaned);
583
584         PMD_TX_FREE_LOG(DEBUG,
585                         "Cleaning %4u TX descriptors: %4u to %4u "
586                         "(port=%d queue=%d)",
587                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
588                         txq->port_id, txq->queue_id);
589
590         /*
591          * The last descriptor to clean is done, so that means all the
592          * descriptors from the last descriptor that was cleaned
593          * up to the last descriptor with the RS bit set
594          * are done. Only reset the threshold descriptor.
595          */
596         txr[desc_to_clean_to].wb.status = 0;
597
598         /* Update the txq to reflect the last descriptor that was cleaned */
599         txq->last_desc_cleaned = desc_to_clean_to;
600         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
601
602         /* No Error */
603         return 0;
604 }
605
606 uint16_t
607 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
608                 uint16_t nb_pkts)
609 {
610         struct ixgbe_tx_queue *txq;
611         struct ixgbe_tx_entry *sw_ring;
612         struct ixgbe_tx_entry *txe, *txn;
613         volatile union ixgbe_adv_tx_desc *txr;
614         volatile union ixgbe_adv_tx_desc *txd, *txp;
615         struct rte_mbuf     *tx_pkt;
616         struct rte_mbuf     *m_seg;
617         uint64_t buf_dma_addr;
618         uint32_t olinfo_status;
619         uint32_t cmd_type_len;
620         uint32_t pkt_len;
621         uint16_t slen;
622         uint64_t ol_flags;
623         uint16_t tx_id;
624         uint16_t tx_last;
625         uint16_t nb_tx;
626         uint16_t nb_used;
627         uint64_t tx_ol_req;
628         uint32_t ctx = 0;
629         uint32_t new_ctx;
630         union ixgbe_tx_offload tx_offload;
631
632         tx_offload.data[0] = 0;
633         tx_offload.data[1] = 0;
634         txq = tx_queue;
635         sw_ring = txq->sw_ring;
636         txr     = txq->tx_ring;
637         tx_id   = txq->tx_tail;
638         txe = &sw_ring[tx_id];
639         txp = NULL;
640
641         /* Determine if the descriptor ring needs to be cleaned. */
642         if (txq->nb_tx_free < txq->tx_free_thresh)
643                 ixgbe_xmit_cleanup(txq);
644
645         rte_prefetch0(&txe->mbuf->pool);
646
647         /* TX loop */
648         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
649                 new_ctx = 0;
650                 tx_pkt = *tx_pkts++;
651                 pkt_len = tx_pkt->pkt_len;
652
653                 /*
654                  * Determine how many (if any) context descriptors
655                  * are needed for offload functionality.
656                  */
657                 ol_flags = tx_pkt->ol_flags;
658
659                 /* If hardware offload required */
660                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
661                 if (tx_ol_req) {
662                         tx_offload.l2_len = tx_pkt->l2_len;
663                         tx_offload.l3_len = tx_pkt->l3_len;
664                         tx_offload.l4_len = tx_pkt->l4_len;
665                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
666                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
667                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
668                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
669
670                         /* If new context need be built or reuse the exist ctx. */
671                         ctx = what_advctx_update(txq, tx_ol_req,
672                                 tx_offload);
673                         /* Only allocate context descriptor if required*/
674                         new_ctx = (ctx == IXGBE_CTX_NUM);
675                         ctx = txq->ctx_curr;
676                 }
677
678                 /*
679                  * Keep track of how many descriptors are used this loop
680                  * This will always be the number of segments + the number of
681                  * Context descriptors required to transmit the packet
682                  */
683                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
684
685                 if (txp != NULL &&
686                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
687                         /* set RS on the previous packet in the burst */
688                         txp->read.cmd_type_len |=
689                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
690
691                 /*
692                  * The number of descriptors that must be allocated for a
693                  * packet is the number of segments of that packet, plus 1
694                  * Context Descriptor for the hardware offload, if any.
695                  * Determine the last TX descriptor to allocate in the TX ring
696                  * for the packet, starting from the current position (tx_id)
697                  * in the ring.
698                  */
699                 tx_last = (uint16_t) (tx_id + nb_used - 1);
700
701                 /* Circular ring */
702                 if (tx_last >= txq->nb_tx_desc)
703                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
704
705                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
706                            " tx_first=%u tx_last=%u",
707                            (unsigned) txq->port_id,
708                            (unsigned) txq->queue_id,
709                            (unsigned) pkt_len,
710                            (unsigned) tx_id,
711                            (unsigned) tx_last);
712
713                 /*
714                  * Make sure there are enough TX descriptors available to
715                  * transmit the entire packet.
716                  * nb_used better be less than or equal to txq->tx_rs_thresh
717                  */
718                 if (nb_used > txq->nb_tx_free) {
719                         PMD_TX_FREE_LOG(DEBUG,
720                                         "Not enough free TX descriptors "
721                                         "nb_used=%4u nb_free=%4u "
722                                         "(port=%d queue=%d)",
723                                         nb_used, txq->nb_tx_free,
724                                         txq->port_id, txq->queue_id);
725
726                         if (ixgbe_xmit_cleanup(txq) != 0) {
727                                 /* Could not clean any descriptors */
728                                 if (nb_tx == 0)
729                                         return 0;
730                                 goto end_of_tx;
731                         }
732
733                         /* nb_used better be <= txq->tx_rs_thresh */
734                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
735                                 PMD_TX_FREE_LOG(DEBUG,
736                                         "The number of descriptors needed to "
737                                         "transmit the packet exceeds the "
738                                         "RS bit threshold. This will impact "
739                                         "performance."
740                                         "nb_used=%4u nb_free=%4u "
741                                         "tx_rs_thresh=%4u. "
742                                         "(port=%d queue=%d)",
743                                         nb_used, txq->nb_tx_free,
744                                         txq->tx_rs_thresh,
745                                         txq->port_id, txq->queue_id);
746                                 /*
747                                  * Loop here until there are enough TX
748                                  * descriptors or until the ring cannot be
749                                  * cleaned.
750                                  */
751                                 while (nb_used > txq->nb_tx_free) {
752                                         if (ixgbe_xmit_cleanup(txq) != 0) {
753                                                 /*
754                                                  * Could not clean any
755                                                  * descriptors
756                                                  */
757                                                 if (nb_tx == 0)
758                                                         return 0;
759                                                 goto end_of_tx;
760                                         }
761                                 }
762                         }
763                 }
764
765                 /*
766                  * By now there are enough free TX descriptors to transmit
767                  * the packet.
768                  */
769
770                 /*
771                  * Set common flags of all TX Data Descriptors.
772                  *
773                  * The following bits must be set in all Data Descriptors:
774                  *   - IXGBE_ADVTXD_DTYP_DATA
775                  *   - IXGBE_ADVTXD_DCMD_DEXT
776                  *
777                  * The following bits must be set in the first Data Descriptor
778                  * and are ignored in the other ones:
779                  *   - IXGBE_ADVTXD_DCMD_IFCS
780                  *   - IXGBE_ADVTXD_MAC_1588
781                  *   - IXGBE_ADVTXD_DCMD_VLE
782                  *
783                  * The following bits must only be set in the last Data
784                  * Descriptor:
785                  *   - IXGBE_TXD_CMD_EOP
786                  *
787                  * The following bits can be set in any Data Descriptor, but
788                  * are only set in the last Data Descriptor:
789                  *   - IXGBE_TXD_CMD_RS
790                  */
791                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
792                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
793
794 #ifdef RTE_LIBRTE_IEEE1588
795                 if (ol_flags & PKT_TX_IEEE1588_TMST)
796                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
797 #endif
798
799                 olinfo_status = 0;
800                 if (tx_ol_req) {
801
802                         if (ol_flags & PKT_TX_TCP_SEG) {
803                                 /* when TSO is on, paylen in descriptor is the
804                                  * not the packet len but the tcp payload len */
805                                 pkt_len -= (tx_offload.l2_len +
806                                         tx_offload.l3_len + tx_offload.l4_len);
807                         }
808
809                         /*
810                          * Setup the TX Advanced Context Descriptor if required
811                          */
812                         if (new_ctx) {
813                                 volatile struct ixgbe_adv_tx_context_desc *
814                                     ctx_txd;
815
816                                 ctx_txd = (volatile struct
817                                     ixgbe_adv_tx_context_desc *)
818                                     &txr[tx_id];
819
820                                 txn = &sw_ring[txe->next_id];
821                                 rte_prefetch0(&txn->mbuf->pool);
822
823                                 if (txe->mbuf != NULL) {
824                                         rte_pktmbuf_free_seg(txe->mbuf);
825                                         txe->mbuf = NULL;
826                                 }
827
828                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
829                                         tx_offload);
830
831                                 txe->last_id = tx_last;
832                                 tx_id = txe->next_id;
833                                 txe = txn;
834                         }
835
836                         /*
837                          * Setup the TX Advanced Data Descriptor,
838                          * This path will go through
839                          * whatever new/reuse the context descriptor
840                          */
841                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
842                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
843                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
844                 }
845
846                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
847
848                 m_seg = tx_pkt;
849                 do {
850                         txd = &txr[tx_id];
851                         txn = &sw_ring[txe->next_id];
852                         rte_prefetch0(&txn->mbuf->pool);
853
854                         if (txe->mbuf != NULL)
855                                 rte_pktmbuf_free_seg(txe->mbuf);
856                         txe->mbuf = m_seg;
857
858                         /*
859                          * Set up Transmit Data Descriptor.
860                          */
861                         slen = m_seg->data_len;
862                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
863                         txd->read.buffer_addr =
864                                 rte_cpu_to_le_64(buf_dma_addr);
865                         txd->read.cmd_type_len =
866                                 rte_cpu_to_le_32(cmd_type_len | slen);
867                         txd->read.olinfo_status =
868                                 rte_cpu_to_le_32(olinfo_status);
869                         txe->last_id = tx_last;
870                         tx_id = txe->next_id;
871                         txe = txn;
872                         m_seg = m_seg->next;
873                 } while (m_seg != NULL);
874
875                 /*
876                  * The last packet data descriptor needs End Of Packet (EOP)
877                  */
878                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
879                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
880                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
881
882                 /* Set RS bit only on threshold packets' last descriptor */
883                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
884                         PMD_TX_FREE_LOG(DEBUG,
885                                         "Setting RS bit on TXD id="
886                                         "%4u (port=%d queue=%d)",
887                                         tx_last, txq->port_id, txq->queue_id);
888
889                         cmd_type_len |= IXGBE_TXD_CMD_RS;
890
891                         /* Update txq RS bit counters */
892                         txq->nb_tx_used = 0;
893                         txp = NULL;
894                 } else
895                         txp = txd;
896
897                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
898         }
899
900 end_of_tx:
901         /* set RS on last packet in the burst */
902         if (txp != NULL)
903                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
904
905         rte_wmb();
906
907         /*
908          * Set the Transmit Descriptor Tail (TDT)
909          */
910         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
911                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
912                    (unsigned) tx_id, (unsigned) nb_tx);
913         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
914         txq->tx_tail = tx_id;
915
916         return nb_tx;
917 }
918
919 /*********************************************************************
920  *
921  *  TX prep functions
922  *
923  **********************************************************************/
924 uint16_t
925 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
926 {
927         int i, ret;
928         uint64_t ol_flags;
929         struct rte_mbuf *m;
930         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
931
932         for (i = 0; i < nb_pkts; i++) {
933                 m = tx_pkts[i];
934                 ol_flags = m->ol_flags;
935
936                 /**
937                  * Check if packet meets requirements for number of segments
938                  *
939                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
940                  *       non-TSO
941                  */
942
943                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
944                         rte_errno = -EINVAL;
945                         return i;
946                 }
947
948                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
949                         rte_errno = -ENOTSUP;
950                         return i;
951                 }
952
953 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
954                 ret = rte_validate_tx_offload(m);
955                 if (ret != 0) {
956                         rte_errno = ret;
957                         return i;
958                 }
959 #endif
960                 ret = rte_net_intel_cksum_prepare(m);
961                 if (ret != 0) {
962                         rte_errno = ret;
963                         return i;
964                 }
965         }
966
967         return i;
968 }
969
970 /*********************************************************************
971  *
972  *  RX functions
973  *
974  **********************************************************************/
975
976 #define IXGBE_PACKET_TYPE_ETHER                         0X00
977 #define IXGBE_PACKET_TYPE_IPV4                          0X01
978 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
979 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
980 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
981 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
982 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
983 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
984 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
985 #define IXGBE_PACKET_TYPE_IPV6                          0X04
986 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
987 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
988 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
989 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
990 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
991 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
992 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
993 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
994 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
995 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
996 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
997 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
998 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
999 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1000 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1001 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1002 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1003 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1004 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1005 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1006 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1007 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1008 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1009
1010 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1011 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1012 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1013 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1014 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1015 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1016 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1017 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1018 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1019 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1020 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1021 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1022 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1023 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1024 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1025 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1026 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1027 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1028 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1029 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1030 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1031 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1032 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1033
1034 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1035 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1036 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1037 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1038 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1039 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1040 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1041 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1042 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1043 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1044 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1045 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1046 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1047 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1048 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1049 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1050 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1051 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1052 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1053 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1054 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1055 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1056 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1057
1058 #define IXGBE_PACKET_TYPE_MAX               0X80
1059 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1060 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1061
1062 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1063 static inline uint32_t
1064 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1065 {
1066         /**
1067          * Use 2 different table for normal packet and tunnel packet
1068          * to save the space.
1069          */
1070         static const uint32_t
1071                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1072                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1073                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1074                         RTE_PTYPE_L3_IPV4,
1075                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1077                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1079                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1085                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1086                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1087                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1089                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1090                         RTE_PTYPE_L3_IPV6,
1091                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1092                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1093                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1094                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1095                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1096                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1097                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV6_EXT,
1099                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1101                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1102                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1103                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1105                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1106                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1107                         RTE_PTYPE_INNER_L3_IPV6,
1108                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1110                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1111                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1112                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1113                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1114                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1116                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1117                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1118                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1119                         RTE_PTYPE_INNER_L3_IPV6,
1120                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1122                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1125                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1126                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1128                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1129                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1130                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1132                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1134                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1135                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1137                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1140                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1141                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1142                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1143                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1144                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1145                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1146                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1147                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1148                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1149                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1150                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1151                         RTE_PTYPE_L2_ETHER |
1152                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1154         };
1155
1156         static const uint32_t
1157                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1158                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1160                         RTE_PTYPE_INNER_L2_ETHER,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1164                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1166                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1167                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1169                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1176                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1178                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1179                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1180                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1181                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1182                         RTE_PTYPE_INNER_L4_TCP,
1183                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1184                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1185                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1186                         RTE_PTYPE_INNER_L4_TCP,
1187                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1189                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1190                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1192                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1193                         RTE_PTYPE_INNER_L4_TCP,
1194                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1195                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1196                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1197                         RTE_PTYPE_INNER_L3_IPV4,
1198                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1199                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1201                         RTE_PTYPE_INNER_L4_UDP,
1202                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1205                         RTE_PTYPE_INNER_L4_UDP,
1206                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1207                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1209                         RTE_PTYPE_INNER_L4_SCTP,
1210                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1211                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1216                         RTE_PTYPE_INNER_L4_UDP,
1217                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1220                         RTE_PTYPE_INNER_L4_SCTP,
1221                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1224                         RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1228                         RTE_PTYPE_INNER_L4_SCTP,
1229                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1231                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1232                         RTE_PTYPE_INNER_L4_SCTP,
1233                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1236                         RTE_PTYPE_INNER_L4_TCP,
1237                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1240                         RTE_PTYPE_INNER_L4_UDP,
1241
1242                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1244                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV4,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1250                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1251                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1252                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV6,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4,
1269                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1270                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1271                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1272                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1273                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1274                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1275                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1276                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1277                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1278                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1280                         RTE_PTYPE_INNER_L3_IPV4,
1281                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1282                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1285                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1286                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1287                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1288                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1289                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1290                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1293                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1294                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1297                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1298                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1301                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1302                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                         RTE_PTYPE_INNER_L3_IPV4,
1305                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1306                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1309                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1310                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1313                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1314                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1315                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1316                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1317                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1318                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1321                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1322                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1323                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1324                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1325                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1326                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1329                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1330                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1333         };
1334
1335         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1336                 return RTE_PTYPE_UNKNOWN;
1337
1338         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1339
1340         /* For tunnel packet */
1341         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1342                 /* Remove the tunnel bit to save the space. */
1343                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1344                 return ptype_table_tn[pkt_info];
1345         }
1346
1347         /**
1348          * For x550, if it's not tunnel,
1349          * tunnel type bit should be set to 0.
1350          * Reuse 82599's mask.
1351          */
1352         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1353
1354         return ptype_table[pkt_info];
1355 }
1356
1357 static inline uint64_t
1358 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1359 {
1360         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1361                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1362                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1363                 PKT_RX_RSS_HASH, 0, 0, 0,
1364                 0, 0, 0,  PKT_RX_FDIR,
1365         };
1366 #ifdef RTE_LIBRTE_IEEE1588
1367         static uint64_t ip_pkt_etqf_map[8] = {
1368                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1369                 0, 0, 0, 0,
1370         };
1371
1372         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1373                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1374                                 ip_rss_types_map[pkt_info & 0XF];
1375         else
1376                 return ip_rss_types_map[pkt_info & 0XF];
1377 #else
1378         return ip_rss_types_map[pkt_info & 0XF];
1379 #endif
1380 }
1381
1382 static inline uint64_t
1383 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1384 {
1385         uint64_t pkt_flags;
1386
1387         /*
1388          * Check if VLAN present only.
1389          * Do not check whether L3/L4 rx checksum done by NIC or not,
1390          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1391          */
1392         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1393
1394 #ifdef RTE_LIBRTE_IEEE1588
1395         if (rx_status & IXGBE_RXD_STAT_TMST)
1396                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1397 #endif
1398         return pkt_flags;
1399 }
1400
1401 static inline uint64_t
1402 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1403 {
1404         uint64_t pkt_flags;
1405
1406         /*
1407          * Bit 31: IPE, IPv4 checksum error
1408          * Bit 30: L4I, L4I integrity error
1409          */
1410         static uint64_t error_to_pkt_flags_map[4] = {
1411                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1412                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1413                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1414                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1415         };
1416         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1417                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1418
1419         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1420             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1421                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1422         }
1423
1424         return pkt_flags;
1425 }
1426
1427 /*
1428  * LOOK_AHEAD defines how many desc statuses to check beyond the
1429  * current descriptor.
1430  * It must be a pound define for optimal performance.
1431  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1432  * function only works with LOOK_AHEAD=8.
1433  */
1434 #define LOOK_AHEAD 8
1435 #if (LOOK_AHEAD != 8)
1436 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1437 #endif
1438 static inline int
1439 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1440 {
1441         volatile union ixgbe_adv_rx_desc *rxdp;
1442         struct ixgbe_rx_entry *rxep;
1443         struct rte_mbuf *mb;
1444         uint16_t pkt_len;
1445         uint64_t pkt_flags;
1446         int nb_dd;
1447         uint32_t s[LOOK_AHEAD];
1448         uint32_t pkt_info[LOOK_AHEAD];
1449         int i, j, nb_rx = 0;
1450         uint32_t status;
1451         uint64_t vlan_flags = rxq->vlan_flags;
1452
1453         /* get references to current descriptor and S/W ring entry */
1454         rxdp = &rxq->rx_ring[rxq->rx_tail];
1455         rxep = &rxq->sw_ring[rxq->rx_tail];
1456
1457         status = rxdp->wb.upper.status_error;
1458         /* check to make sure there is at least 1 packet to receive */
1459         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1460                 return 0;
1461
1462         /*
1463          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1464          * reference packets that are ready to be received.
1465          */
1466         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1467              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1468                 /* Read desc statuses backwards to avoid race condition */
1469                 for (j = 0; j < LOOK_AHEAD; j++)
1470                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1471
1472                 rte_smp_rmb();
1473
1474                 /* Compute how many status bits were set */
1475                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1476                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1477                         ;
1478
1479                 for (j = 0; j < nb_dd; j++)
1480                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1481                                                        lo_dword.data);
1482
1483                 nb_rx += nb_dd;
1484
1485                 /* Translate descriptor info to mbuf format */
1486                 for (j = 0; j < nb_dd; ++j) {
1487                         mb = rxep[j].mbuf;
1488                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1489                                   rxq->crc_len;
1490                         mb->data_len = pkt_len;
1491                         mb->pkt_len = pkt_len;
1492                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1493
1494                         /* convert descriptor fields to rte mbuf flags */
1495                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1496                                 vlan_flags);
1497                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1498                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1499                                         ((uint16_t)pkt_info[j]);
1500                         mb->ol_flags = pkt_flags;
1501                         mb->packet_type =
1502                                 ixgbe_rxd_pkt_info_to_pkt_type
1503                                         (pkt_info[j], rxq->pkt_type_mask);
1504
1505                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1506                                 mb->hash.rss = rte_le_to_cpu_32(
1507                                     rxdp[j].wb.lower.hi_dword.rss);
1508                         else if (pkt_flags & PKT_RX_FDIR) {
1509                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1510                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1511                                     IXGBE_ATR_HASH_MASK;
1512                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1513                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1514                         }
1515                 }
1516
1517                 /* Move mbuf pointers from the S/W ring to the stage */
1518                 for (j = 0; j < LOOK_AHEAD; ++j) {
1519                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1520                 }
1521
1522                 /* stop if all requested packets could not be received */
1523                 if (nb_dd != LOOK_AHEAD)
1524                         break;
1525         }
1526
1527         /* clear software ring entries so we can cleanup correctly */
1528         for (i = 0; i < nb_rx; ++i) {
1529                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1530         }
1531
1532
1533         return nb_rx;
1534 }
1535
1536 static inline int
1537 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1538 {
1539         volatile union ixgbe_adv_rx_desc *rxdp;
1540         struct ixgbe_rx_entry *rxep;
1541         struct rte_mbuf *mb;
1542         uint16_t alloc_idx;
1543         __le64 dma_addr;
1544         int diag, i;
1545
1546         /* allocate buffers in bulk directly into the S/W ring */
1547         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1548         rxep = &rxq->sw_ring[alloc_idx];
1549         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1550                                     rxq->rx_free_thresh);
1551         if (unlikely(diag != 0))
1552                 return -ENOMEM;
1553
1554         rxdp = &rxq->rx_ring[alloc_idx];
1555         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1556                 /* populate the static rte mbuf fields */
1557                 mb = rxep[i].mbuf;
1558                 if (reset_mbuf) {
1559                         mb->next = NULL;
1560                         mb->nb_segs = 1;
1561                         mb->port = rxq->port_id;
1562                 }
1563
1564                 rte_mbuf_refcnt_set(mb, 1);
1565                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1566
1567                 /* populate the descriptors */
1568                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1569                 rxdp[i].read.hdr_addr = 0;
1570                 rxdp[i].read.pkt_addr = dma_addr;
1571         }
1572
1573         /* update state of internal queue structure */
1574         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1575         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1576                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1577
1578         /* no errors */
1579         return 0;
1580 }
1581
1582 static inline uint16_t
1583 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1584                          uint16_t nb_pkts)
1585 {
1586         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1587         int i;
1588
1589         /* how many packets are ready to return? */
1590         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1591
1592         /* copy mbuf pointers to the application's packet list */
1593         for (i = 0; i < nb_pkts; ++i)
1594                 rx_pkts[i] = stage[i];
1595
1596         /* update internal queue state */
1597         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1598         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1599
1600         return nb_pkts;
1601 }
1602
1603 static inline uint16_t
1604 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1605              uint16_t nb_pkts)
1606 {
1607         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1608         uint16_t nb_rx = 0;
1609
1610         /* Any previously recv'd pkts will be returned from the Rx stage */
1611         if (rxq->rx_nb_avail)
1612                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1613
1614         /* Scan the H/W ring for packets to receive */
1615         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1616
1617         /* update internal queue state */
1618         rxq->rx_next_avail = 0;
1619         rxq->rx_nb_avail = nb_rx;
1620         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1621
1622         /* if required, allocate new buffers to replenish descriptors */
1623         if (rxq->rx_tail > rxq->rx_free_trigger) {
1624                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1625
1626                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1627                         int i, j;
1628
1629                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1630                                    "queue_id=%u", (unsigned) rxq->port_id,
1631                                    (unsigned) rxq->queue_id);
1632
1633                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1634                                 rxq->rx_free_thresh;
1635
1636                         /*
1637                          * Need to rewind any previous receives if we cannot
1638                          * allocate new buffers to replenish the old ones.
1639                          */
1640                         rxq->rx_nb_avail = 0;
1641                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1642                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1643                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1644
1645                         return 0;
1646                 }
1647
1648                 /* update tail pointer */
1649                 rte_wmb();
1650                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1651                                             cur_free_trigger);
1652         }
1653
1654         if (rxq->rx_tail >= rxq->nb_rx_desc)
1655                 rxq->rx_tail = 0;
1656
1657         /* received any packets this loop? */
1658         if (rxq->rx_nb_avail)
1659                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1660
1661         return 0;
1662 }
1663
1664 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1665 uint16_t
1666 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1667                            uint16_t nb_pkts)
1668 {
1669         uint16_t nb_rx;
1670
1671         if (unlikely(nb_pkts == 0))
1672                 return 0;
1673
1674         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1675                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1676
1677         /* request is relatively large, chunk it up */
1678         nb_rx = 0;
1679         while (nb_pkts) {
1680                 uint16_t ret, n;
1681
1682                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1683                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1684                 nb_rx = (uint16_t)(nb_rx + ret);
1685                 nb_pkts = (uint16_t)(nb_pkts - ret);
1686                 if (ret < n)
1687                         break;
1688         }
1689
1690         return nb_rx;
1691 }
1692
1693 uint16_t
1694 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1695                 uint16_t nb_pkts)
1696 {
1697         struct ixgbe_rx_queue *rxq;
1698         volatile union ixgbe_adv_rx_desc *rx_ring;
1699         volatile union ixgbe_adv_rx_desc *rxdp;
1700         struct ixgbe_rx_entry *sw_ring;
1701         struct ixgbe_rx_entry *rxe;
1702         struct rte_mbuf *rxm;
1703         struct rte_mbuf *nmb;
1704         union ixgbe_adv_rx_desc rxd;
1705         uint64_t dma_addr;
1706         uint32_t staterr;
1707         uint32_t pkt_info;
1708         uint16_t pkt_len;
1709         uint16_t rx_id;
1710         uint16_t nb_rx;
1711         uint16_t nb_hold;
1712         uint64_t pkt_flags;
1713         uint64_t vlan_flags;
1714
1715         nb_rx = 0;
1716         nb_hold = 0;
1717         rxq = rx_queue;
1718         rx_id = rxq->rx_tail;
1719         rx_ring = rxq->rx_ring;
1720         sw_ring = rxq->sw_ring;
1721         vlan_flags = rxq->vlan_flags;
1722         while (nb_rx < nb_pkts) {
1723                 /*
1724                  * The order of operations here is important as the DD status
1725                  * bit must not be read after any other descriptor fields.
1726                  * rx_ring and rxdp are pointing to volatile data so the order
1727                  * of accesses cannot be reordered by the compiler. If they were
1728                  * not volatile, they could be reordered which could lead to
1729                  * using invalid descriptor fields when read from rxd.
1730                  */
1731                 rxdp = &rx_ring[rx_id];
1732                 staterr = rxdp->wb.upper.status_error;
1733                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1734                         break;
1735                 rxd = *rxdp;
1736
1737                 /*
1738                  * End of packet.
1739                  *
1740                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1741                  * is likely to be invalid and to be dropped by the various
1742                  * validation checks performed by the network stack.
1743                  *
1744                  * Allocate a new mbuf to replenish the RX ring descriptor.
1745                  * If the allocation fails:
1746                  *    - arrange for that RX descriptor to be the first one
1747                  *      being parsed the next time the receive function is
1748                  *      invoked [on the same queue].
1749                  *
1750                  *    - Stop parsing the RX ring and return immediately.
1751                  *
1752                  * This policy do not drop the packet received in the RX
1753                  * descriptor for which the allocation of a new mbuf failed.
1754                  * Thus, it allows that packet to be later retrieved if
1755                  * mbuf have been freed in the mean time.
1756                  * As a side effect, holding RX descriptors instead of
1757                  * systematically giving them back to the NIC may lead to
1758                  * RX ring exhaustion situations.
1759                  * However, the NIC can gracefully prevent such situations
1760                  * to happen by sending specific "back-pressure" flow control
1761                  * frames to its peer(s).
1762                  */
1763                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1764                            "ext_err_stat=0x%08x pkt_len=%u",
1765                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1766                            (unsigned) rx_id, (unsigned) staterr,
1767                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1768
1769                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1770                 if (nmb == NULL) {
1771                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1772                                    "queue_id=%u", (unsigned) rxq->port_id,
1773                                    (unsigned) rxq->queue_id);
1774                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1775                         break;
1776                 }
1777
1778                 nb_hold++;
1779                 rxe = &sw_ring[rx_id];
1780                 rx_id++;
1781                 if (rx_id == rxq->nb_rx_desc)
1782                         rx_id = 0;
1783
1784                 /* Prefetch next mbuf while processing current one. */
1785                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1786
1787                 /*
1788                  * When next RX descriptor is on a cache-line boundary,
1789                  * prefetch the next 4 RX descriptors and the next 8 pointers
1790                  * to mbufs.
1791                  */
1792                 if ((rx_id & 0x3) == 0) {
1793                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1794                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1795                 }
1796
1797                 rxm = rxe->mbuf;
1798                 rxe->mbuf = nmb;
1799                 dma_addr =
1800                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1801                 rxdp->read.hdr_addr = 0;
1802                 rxdp->read.pkt_addr = dma_addr;
1803
1804                 /*
1805                  * Initialize the returned mbuf.
1806                  * 1) setup generic mbuf fields:
1807                  *    - number of segments,
1808                  *    - next segment,
1809                  *    - packet length,
1810                  *    - RX port identifier.
1811                  * 2) integrate hardware offload data, if any:
1812                  *    - RSS flag & hash,
1813                  *    - IP checksum flag,
1814                  *    - VLAN TCI, if any,
1815                  *    - error flags.
1816                  */
1817                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1818                                       rxq->crc_len);
1819                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1820                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1821                 rxm->nb_segs = 1;
1822                 rxm->next = NULL;
1823                 rxm->pkt_len = pkt_len;
1824                 rxm->data_len = pkt_len;
1825                 rxm->port = rxq->port_id;
1826
1827                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1828                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1829                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1830
1831                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1832                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1833                 pkt_flags = pkt_flags |
1834                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1835                 rxm->ol_flags = pkt_flags;
1836                 rxm->packet_type =
1837                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1838                                                        rxq->pkt_type_mask);
1839
1840                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1841                         rxm->hash.rss = rte_le_to_cpu_32(
1842                                                 rxd.wb.lower.hi_dword.rss);
1843                 else if (pkt_flags & PKT_RX_FDIR) {
1844                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1845                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1846                                         IXGBE_ATR_HASH_MASK;
1847                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1848                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1849                 }
1850                 /*
1851                  * Store the mbuf address into the next entry of the array
1852                  * of returned packets.
1853                  */
1854                 rx_pkts[nb_rx++] = rxm;
1855         }
1856         rxq->rx_tail = rx_id;
1857
1858         /*
1859          * If the number of free RX descriptors is greater than the RX free
1860          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1861          * register.
1862          * Update the RDT with the value of the last processed RX descriptor
1863          * minus 1, to guarantee that the RDT register is never equal to the
1864          * RDH register, which creates a "full" ring situtation from the
1865          * hardware point of view...
1866          */
1867         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1868         if (nb_hold > rxq->rx_free_thresh) {
1869                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1870                            "nb_hold=%u nb_rx=%u",
1871                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1872                            (unsigned) rx_id, (unsigned) nb_hold,
1873                            (unsigned) nb_rx);
1874                 rx_id = (uint16_t) ((rx_id == 0) ?
1875                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1876                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1877                 nb_hold = 0;
1878         }
1879         rxq->nb_rx_hold = nb_hold;
1880         return nb_rx;
1881 }
1882
1883 /**
1884  * Detect an RSC descriptor.
1885  */
1886 static inline uint32_t
1887 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1888 {
1889         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1890                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1891 }
1892
1893 /**
1894  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1895  *
1896  * Fill the following info in the HEAD buffer of the Rx cluster:
1897  *    - RX port identifier
1898  *    - hardware offload data, if any:
1899  *      - RSS flag & hash
1900  *      - IP checksum flag
1901  *      - VLAN TCI, if any
1902  *      - error flags
1903  * @head HEAD of the packet cluster
1904  * @desc HW descriptor to get data from
1905  * @rxq Pointer to the Rx queue
1906  */
1907 static inline void
1908 ixgbe_fill_cluster_head_buf(
1909         struct rte_mbuf *head,
1910         union ixgbe_adv_rx_desc *desc,
1911         struct ixgbe_rx_queue *rxq,
1912         uint32_t staterr)
1913 {
1914         uint32_t pkt_info;
1915         uint64_t pkt_flags;
1916
1917         head->port = rxq->port_id;
1918
1919         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1920          * set in the pkt_flags field.
1921          */
1922         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1923         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1924         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1925         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1926         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1927         head->ol_flags = pkt_flags;
1928         head->packet_type =
1929                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1930
1931         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1932                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1933         else if (pkt_flags & PKT_RX_FDIR) {
1934                 head->hash.fdir.hash =
1935                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1936                                                           & IXGBE_ATR_HASH_MASK;
1937                 head->hash.fdir.id =
1938                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1939         }
1940 }
1941
1942 /**
1943  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1944  *
1945  * @rx_queue Rx queue handle
1946  * @rx_pkts table of received packets
1947  * @nb_pkts size of rx_pkts table
1948  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1949  *
1950  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1951  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1952  *
1953  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1954  * 1) When non-EOP RSC completion arrives:
1955  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1956  *       segment's data length.
1957  *    b) Set the "next" pointer of the current segment to point to the segment
1958  *       at the NEXTP index.
1959  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1960  *       in the sw_rsc_ring.
1961  * 2) When EOP arrives we just update the cluster's total length and offload
1962  *    flags and deliver the cluster up to the upper layers. In our case - put it
1963  *    in the rx_pkts table.
1964  *
1965  * Returns the number of received packets/clusters (according to the "bulk
1966  * receive" interface).
1967  */
1968 static inline uint16_t
1969 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1970                     bool bulk_alloc)
1971 {
1972         struct ixgbe_rx_queue *rxq = rx_queue;
1973         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1974         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1975         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1976         uint16_t rx_id = rxq->rx_tail;
1977         uint16_t nb_rx = 0;
1978         uint16_t nb_hold = rxq->nb_rx_hold;
1979         uint16_t prev_id = rxq->rx_tail;
1980
1981         while (nb_rx < nb_pkts) {
1982                 bool eop;
1983                 struct ixgbe_rx_entry *rxe;
1984                 struct ixgbe_scattered_rx_entry *sc_entry;
1985                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1986                 struct ixgbe_rx_entry *next_rxe = NULL;
1987                 struct rte_mbuf *first_seg;
1988                 struct rte_mbuf *rxm;
1989                 struct rte_mbuf *nmb;
1990                 union ixgbe_adv_rx_desc rxd;
1991                 uint16_t data_len;
1992                 uint16_t next_id;
1993                 volatile union ixgbe_adv_rx_desc *rxdp;
1994                 uint32_t staterr;
1995
1996 next_desc:
1997                 /*
1998                  * The code in this whole file uses the volatile pointer to
1999                  * ensure the read ordering of the status and the rest of the
2000                  * descriptor fields (on the compiler level only!!!). This is so
2001                  * UGLY - why not to just use the compiler barrier instead? DPDK
2002                  * even has the rte_compiler_barrier() for that.
2003                  *
2004                  * But most importantly this is just wrong because this doesn't
2005                  * ensure memory ordering in a general case at all. For
2006                  * instance, DPDK is supposed to work on Power CPUs where
2007                  * compiler barrier may just not be enough!
2008                  *
2009                  * I tried to write only this function properly to have a
2010                  * starting point (as a part of an LRO/RSC series) but the
2011                  * compiler cursed at me when I tried to cast away the
2012                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2013                  * keeping it the way it is for now.
2014                  *
2015                  * The code in this file is broken in so many other places and
2016                  * will just not work on a big endian CPU anyway therefore the
2017                  * lines below will have to be revisited together with the rest
2018                  * of the ixgbe PMD.
2019                  *
2020                  * TODO:
2021                  *    - Get rid of "volatile" crap and let the compiler do its
2022                  *      job.
2023                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2024                  *      memory ordering below.
2025                  */
2026                 rxdp = &rx_ring[rx_id];
2027                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2028
2029                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2030                         break;
2031
2032                 rxd = *rxdp;
2033
2034                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2035                                   "staterr=0x%x data_len=%u",
2036                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2037                            rte_le_to_cpu_16(rxd.wb.upper.length));
2038
2039                 if (!bulk_alloc) {
2040                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2041                         if (nmb == NULL) {
2042                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2043                                                   "port_id=%u queue_id=%u",
2044                                            rxq->port_id, rxq->queue_id);
2045
2046                                 rte_eth_devices[rxq->port_id].data->
2047                                                         rx_mbuf_alloc_failed++;
2048                                 break;
2049                         }
2050                 } else if (nb_hold > rxq->rx_free_thresh) {
2051                         uint16_t next_rdt = rxq->rx_free_trigger;
2052
2053                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2054                                 rte_wmb();
2055                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2056                                                             next_rdt);
2057                                 nb_hold -= rxq->rx_free_thresh;
2058                         } else {
2059                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2060                                                   "port_id=%u queue_id=%u",
2061                                            rxq->port_id, rxq->queue_id);
2062
2063                                 rte_eth_devices[rxq->port_id].data->
2064                                                         rx_mbuf_alloc_failed++;
2065                                 break;
2066                         }
2067                 }
2068
2069                 nb_hold++;
2070                 rxe = &sw_ring[rx_id];
2071                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2072
2073                 next_id = rx_id + 1;
2074                 if (next_id == rxq->nb_rx_desc)
2075                         next_id = 0;
2076
2077                 /* Prefetch next mbuf while processing current one. */
2078                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2079
2080                 /*
2081                  * When next RX descriptor is on a cache-line boundary,
2082                  * prefetch the next 4 RX descriptors and the next 4 pointers
2083                  * to mbufs.
2084                  */
2085                 if ((next_id & 0x3) == 0) {
2086                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2087                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2088                 }
2089
2090                 rxm = rxe->mbuf;
2091
2092                 if (!bulk_alloc) {
2093                         __le64 dma =
2094                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2095                         /*
2096                          * Update RX descriptor with the physical address of the
2097                          * new data buffer of the new allocated mbuf.
2098                          */
2099                         rxe->mbuf = nmb;
2100
2101                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2102                         rxdp->read.hdr_addr = 0;
2103                         rxdp->read.pkt_addr = dma;
2104                 } else
2105                         rxe->mbuf = NULL;
2106
2107                 /*
2108                  * Set data length & data buffer address of mbuf.
2109                  */
2110                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2111                 rxm->data_len = data_len;
2112
2113                 if (!eop) {
2114                         uint16_t nextp_id;
2115                         /*
2116                          * Get next descriptor index:
2117                          *  - For RSC it's in the NEXTP field.
2118                          *  - For a scattered packet - it's just a following
2119                          *    descriptor.
2120                          */
2121                         if (ixgbe_rsc_count(&rxd))
2122                                 nextp_id =
2123                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2124                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2125                         else
2126                                 nextp_id = next_id;
2127
2128                         next_sc_entry = &sw_sc_ring[nextp_id];
2129                         next_rxe = &sw_ring[nextp_id];
2130                         rte_ixgbe_prefetch(next_rxe);
2131                 }
2132
2133                 sc_entry = &sw_sc_ring[rx_id];
2134                 first_seg = sc_entry->fbuf;
2135                 sc_entry->fbuf = NULL;
2136
2137                 /*
2138                  * If this is the first buffer of the received packet,
2139                  * set the pointer to the first mbuf of the packet and
2140                  * initialize its context.
2141                  * Otherwise, update the total length and the number of segments
2142                  * of the current scattered packet, and update the pointer to
2143                  * the last mbuf of the current packet.
2144                  */
2145                 if (first_seg == NULL) {
2146                         first_seg = rxm;
2147                         first_seg->pkt_len = data_len;
2148                         first_seg->nb_segs = 1;
2149                 } else {
2150                         first_seg->pkt_len += data_len;
2151                         first_seg->nb_segs++;
2152                 }
2153
2154                 prev_id = rx_id;
2155                 rx_id = next_id;
2156
2157                 /*
2158                  * If this is not the last buffer of the received packet, update
2159                  * the pointer to the first mbuf at the NEXTP entry in the
2160                  * sw_sc_ring and continue to parse the RX ring.
2161                  */
2162                 if (!eop && next_rxe) {
2163                         rxm->next = next_rxe->mbuf;
2164                         next_sc_entry->fbuf = first_seg;
2165                         goto next_desc;
2166                 }
2167
2168                 /*
2169                  * This is the last buffer of the received packet - return
2170                  * the current cluster to the user.
2171                  */
2172                 rxm->next = NULL;
2173
2174                 /* Initialize the first mbuf of the returned packet */
2175                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2176
2177                 /*
2178                  * Deal with the case, when HW CRC srip is disabled.
2179                  * That can't happen when LRO is enabled, but still could
2180                  * happen for scattered RX mode.
2181                  */
2182                 first_seg->pkt_len -= rxq->crc_len;
2183                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2184                         struct rte_mbuf *lp;
2185
2186                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2187                                 ;
2188
2189                         first_seg->nb_segs--;
2190                         lp->data_len -= rxq->crc_len - rxm->data_len;
2191                         lp->next = NULL;
2192                         rte_pktmbuf_free_seg(rxm);
2193                 } else
2194                         rxm->data_len -= rxq->crc_len;
2195
2196                 /* Prefetch data of first segment, if configured to do so. */
2197                 rte_packet_prefetch((char *)first_seg->buf_addr +
2198                         first_seg->data_off);
2199
2200                 /*
2201                  * Store the mbuf address into the next entry of the array
2202                  * of returned packets.
2203                  */
2204                 rx_pkts[nb_rx++] = first_seg;
2205         }
2206
2207         /*
2208          * Record index of the next RX descriptor to probe.
2209          */
2210         rxq->rx_tail = rx_id;
2211
2212         /*
2213          * If the number of free RX descriptors is greater than the RX free
2214          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2215          * register.
2216          * Update the RDT with the value of the last processed RX descriptor
2217          * minus 1, to guarantee that the RDT register is never equal to the
2218          * RDH register, which creates a "full" ring situtation from the
2219          * hardware point of view...
2220          */
2221         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2222                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2223                            "nb_hold=%u nb_rx=%u",
2224                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2225
2226                 rte_wmb();
2227                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2228                 nb_hold = 0;
2229         }
2230
2231         rxq->nb_rx_hold = nb_hold;
2232         return nb_rx;
2233 }
2234
2235 uint16_t
2236 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2237                                  uint16_t nb_pkts)
2238 {
2239         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2240 }
2241
2242 uint16_t
2243 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2244                                uint16_t nb_pkts)
2245 {
2246         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2247 }
2248
2249 /*********************************************************************
2250  *
2251  *  Queue management functions
2252  *
2253  **********************************************************************/
2254
2255 static void __attribute__((cold))
2256 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2257 {
2258         unsigned i;
2259
2260         if (txq->sw_ring != NULL) {
2261                 for (i = 0; i < txq->nb_tx_desc; i++) {
2262                         if (txq->sw_ring[i].mbuf != NULL) {
2263                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2264                                 txq->sw_ring[i].mbuf = NULL;
2265                         }
2266                 }
2267         }
2268 }
2269
2270 static void __attribute__((cold))
2271 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2272 {
2273         if (txq != NULL &&
2274             txq->sw_ring != NULL)
2275                 rte_free(txq->sw_ring);
2276 }
2277
2278 static void __attribute__((cold))
2279 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2280 {
2281         if (txq != NULL && txq->ops != NULL) {
2282                 txq->ops->release_mbufs(txq);
2283                 txq->ops->free_swring(txq);
2284                 rte_free(txq);
2285         }
2286 }
2287
2288 void __attribute__((cold))
2289 ixgbe_dev_tx_queue_release(void *txq)
2290 {
2291         ixgbe_tx_queue_release(txq);
2292 }
2293
2294 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2295 static void __attribute__((cold))
2296 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2297 {
2298         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2299         struct ixgbe_tx_entry *txe = txq->sw_ring;
2300         uint16_t prev, i;
2301
2302         /* Zero out HW ring memory */
2303         for (i = 0; i < txq->nb_tx_desc; i++) {
2304                 txq->tx_ring[i] = zeroed_desc;
2305         }
2306
2307         /* Initialize SW ring entries */
2308         prev = (uint16_t) (txq->nb_tx_desc - 1);
2309         for (i = 0; i < txq->nb_tx_desc; i++) {
2310                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2311
2312                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2313                 txe[i].mbuf = NULL;
2314                 txe[i].last_id = i;
2315                 txe[prev].next_id = i;
2316                 prev = i;
2317         }
2318
2319         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2320         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2321
2322         txq->tx_tail = 0;
2323         txq->nb_tx_used = 0;
2324         /*
2325          * Always allow 1 descriptor to be un-allocated to avoid
2326          * a H/W race condition
2327          */
2328         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2329         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2330         txq->ctx_curr = 0;
2331         memset((void *)&txq->ctx_cache, 0,
2332                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2333 }
2334
2335 static const struct ixgbe_txq_ops def_txq_ops = {
2336         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2337         .free_swring = ixgbe_tx_free_swring,
2338         .reset = ixgbe_reset_tx_queue,
2339 };
2340
2341 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2342  * the queue parameters. Used in tx_queue_setup by primary process and then
2343  * in dev_init by secondary process when attaching to an existing ethdev.
2344  */
2345 void __attribute__((cold))
2346 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2347 {
2348         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2349         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2350                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2351                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2352                 dev->tx_pkt_prepare = NULL;
2353 #ifdef RTE_IXGBE_INC_VECTOR
2354                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2355                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2356                                         ixgbe_txq_vec_setup(txq) == 0)) {
2357                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2358                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2359                 } else
2360 #endif
2361                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2362         } else {
2363                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2364                 PMD_INIT_LOG(DEBUG,
2365                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2366                                 (unsigned long)txq->txq_flags,
2367                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2368                 PMD_INIT_LOG(DEBUG,
2369                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2370                                 (unsigned long)txq->tx_rs_thresh,
2371                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2372                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2373                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2374         }
2375 }
2376
2377 int __attribute__((cold))
2378 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2379                          uint16_t queue_idx,
2380                          uint16_t nb_desc,
2381                          unsigned int socket_id,
2382                          const struct rte_eth_txconf *tx_conf)
2383 {
2384         const struct rte_memzone *tz;
2385         struct ixgbe_tx_queue *txq;
2386         struct ixgbe_hw     *hw;
2387         uint16_t tx_rs_thresh, tx_free_thresh;
2388
2389         PMD_INIT_FUNC_TRACE();
2390         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2391
2392         /*
2393          * Validate number of transmit descriptors.
2394          * It must not exceed hardware maximum, and must be multiple
2395          * of IXGBE_ALIGN.
2396          */
2397         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2398                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2399                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2400                 return -EINVAL;
2401         }
2402
2403         /*
2404          * The following two parameters control the setting of the RS bit on
2405          * transmit descriptors.
2406          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2407          * descriptors have been used.
2408          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2409          * descriptors are used or if the number of descriptors required
2410          * to transmit a packet is greater than the number of free TX
2411          * descriptors.
2412          * The following constraints must be satisfied:
2413          *  tx_rs_thresh must be greater than 0.
2414          *  tx_rs_thresh must be less than the size of the ring minus 2.
2415          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2416          *  tx_rs_thresh must be a divisor of the ring size.
2417          *  tx_free_thresh must be greater than 0.
2418          *  tx_free_thresh must be less than the size of the ring minus 3.
2419          * One descriptor in the TX ring is used as a sentinel to avoid a
2420          * H/W race condition, hence the maximum threshold constraints.
2421          * When set to zero use default values.
2422          */
2423         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2424                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2425         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2426                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2427         if (tx_rs_thresh >= (nb_desc - 2)) {
2428                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2429                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2430                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2431                         (int)dev->data->port_id, (int)queue_idx);
2432                 return -(EINVAL);
2433         }
2434         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2435                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2436                         "(tx_rs_thresh=%u port=%d queue=%d)",
2437                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2438                         (int)dev->data->port_id, (int)queue_idx);
2439                 return -(EINVAL);
2440         }
2441         if (tx_free_thresh >= (nb_desc - 3)) {
2442                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2443                              "tx_free_thresh must be less than the number of "
2444                              "TX descriptors minus 3. (tx_free_thresh=%u "
2445                              "port=%d queue=%d)",
2446                              (unsigned int)tx_free_thresh,
2447                              (int)dev->data->port_id, (int)queue_idx);
2448                 return -(EINVAL);
2449         }
2450         if (tx_rs_thresh > tx_free_thresh) {
2451                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2452                              "tx_free_thresh. (tx_free_thresh=%u "
2453                              "tx_rs_thresh=%u port=%d queue=%d)",
2454                              (unsigned int)tx_free_thresh,
2455                              (unsigned int)tx_rs_thresh,
2456                              (int)dev->data->port_id,
2457                              (int)queue_idx);
2458                 return -(EINVAL);
2459         }
2460         if ((nb_desc % tx_rs_thresh) != 0) {
2461                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2462                              "number of TX descriptors. (tx_rs_thresh=%u "
2463                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2464                              (int)dev->data->port_id, (int)queue_idx);
2465                 return -(EINVAL);
2466         }
2467
2468         /*
2469          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2470          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2471          * by the NIC and all descriptors are written back after the NIC
2472          * accumulates WTHRESH descriptors.
2473          */
2474         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2475                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2476                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2477                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2478                              (int)dev->data->port_id, (int)queue_idx);
2479                 return -(EINVAL);
2480         }
2481
2482         /* Free memory prior to re-allocation if needed... */
2483         if (dev->data->tx_queues[queue_idx] != NULL) {
2484                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2485                 dev->data->tx_queues[queue_idx] = NULL;
2486         }
2487
2488         /* First allocate the tx queue data structure */
2489         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2490                                  RTE_CACHE_LINE_SIZE, socket_id);
2491         if (txq == NULL)
2492                 return -ENOMEM;
2493
2494         /*
2495          * Allocate TX ring hardware descriptors. A memzone large enough to
2496          * handle the maximum ring size is allocated in order to allow for
2497          * resizing in later calls to the queue setup function.
2498          */
2499         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2500                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2501                         IXGBE_ALIGN, socket_id);
2502         if (tz == NULL) {
2503                 ixgbe_tx_queue_release(txq);
2504                 return -ENOMEM;
2505         }
2506
2507         txq->nb_tx_desc = nb_desc;
2508         txq->tx_rs_thresh = tx_rs_thresh;
2509         txq->tx_free_thresh = tx_free_thresh;
2510         txq->pthresh = tx_conf->tx_thresh.pthresh;
2511         txq->hthresh = tx_conf->tx_thresh.hthresh;
2512         txq->wthresh = tx_conf->tx_thresh.wthresh;
2513         txq->queue_id = queue_idx;
2514         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2515                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2516         txq->port_id = dev->data->port_id;
2517         txq->txq_flags = tx_conf->txq_flags;
2518         txq->ops = &def_txq_ops;
2519         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2520
2521         /*
2522          * Modification to set VFTDT for virtual function if vf is detected
2523          */
2524         if (hw->mac.type == ixgbe_mac_82599_vf ||
2525             hw->mac.type == ixgbe_mac_X540_vf ||
2526             hw->mac.type == ixgbe_mac_X550_vf ||
2527             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2528             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2529                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2530         else
2531                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2532
2533         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2534         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2535
2536         /* Allocate software ring */
2537         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2538                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2539                                 RTE_CACHE_LINE_SIZE, socket_id);
2540         if (txq->sw_ring == NULL) {
2541                 ixgbe_tx_queue_release(txq);
2542                 return -ENOMEM;
2543         }
2544         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2545                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2546
2547         /* set up vector or scalar TX function as appropriate */
2548         ixgbe_set_tx_function(dev, txq);
2549
2550         txq->ops->reset(txq);
2551
2552         dev->data->tx_queues[queue_idx] = txq;
2553
2554
2555         return 0;
2556 }
2557
2558 /**
2559  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2560  *
2561  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2562  * in the sw_rsc_ring is not set to NULL but rather points to the next
2563  * mbuf of this RSC aggregation (that has not been completed yet and still
2564  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2565  * will just free first "nb_segs" segments of the cluster explicitly by calling
2566  * an rte_pktmbuf_free_seg().
2567  *
2568  * @m scattered cluster head
2569  */
2570 static void __attribute__((cold))
2571 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2572 {
2573         uint8_t i, nb_segs = m->nb_segs;
2574         struct rte_mbuf *next_seg;
2575
2576         for (i = 0; i < nb_segs; i++) {
2577                 next_seg = m->next;
2578                 rte_pktmbuf_free_seg(m);
2579                 m = next_seg;
2580         }
2581 }
2582
2583 static void __attribute__((cold))
2584 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2585 {
2586         unsigned i;
2587
2588 #ifdef RTE_IXGBE_INC_VECTOR
2589         /* SSE Vector driver has a different way of releasing mbufs. */
2590         if (rxq->rx_using_sse) {
2591                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2592                 return;
2593         }
2594 #endif
2595
2596         if (rxq->sw_ring != NULL) {
2597                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2598                         if (rxq->sw_ring[i].mbuf != NULL) {
2599                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2600                                 rxq->sw_ring[i].mbuf = NULL;
2601                         }
2602                 }
2603                 if (rxq->rx_nb_avail) {
2604                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2605                                 struct rte_mbuf *mb;
2606
2607                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2608                                 rte_pktmbuf_free_seg(mb);
2609                         }
2610                         rxq->rx_nb_avail = 0;
2611                 }
2612         }
2613
2614         if (rxq->sw_sc_ring)
2615                 for (i = 0; i < rxq->nb_rx_desc; i++)
2616                         if (rxq->sw_sc_ring[i].fbuf) {
2617                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2618                                 rxq->sw_sc_ring[i].fbuf = NULL;
2619                         }
2620 }
2621
2622 static void __attribute__((cold))
2623 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2624 {
2625         if (rxq != NULL) {
2626                 ixgbe_rx_queue_release_mbufs(rxq);
2627                 rte_free(rxq->sw_ring);
2628                 rte_free(rxq->sw_sc_ring);
2629                 rte_free(rxq);
2630         }
2631 }
2632
2633 void __attribute__((cold))
2634 ixgbe_dev_rx_queue_release(void *rxq)
2635 {
2636         ixgbe_rx_queue_release(rxq);
2637 }
2638
2639 /*
2640  * Check if Rx Burst Bulk Alloc function can be used.
2641  * Return
2642  *        0: the preconditions are satisfied and the bulk allocation function
2643  *           can be used.
2644  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2645  *           function must be used.
2646  */
2647 static inline int __attribute__((cold))
2648 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2649 {
2650         int ret = 0;
2651
2652         /*
2653          * Make sure the following pre-conditions are satisfied:
2654          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2655          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2656          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2657          * Scattered packets are not supported.  This should be checked
2658          * outside of this function.
2659          */
2660         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2661                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2662                              "rxq->rx_free_thresh=%d, "
2663                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2664                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2665                 ret = -EINVAL;
2666         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2667                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2668                              "rxq->rx_free_thresh=%d, "
2669                              "rxq->nb_rx_desc=%d",
2670                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2671                 ret = -EINVAL;
2672         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2673                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2674                              "rxq->nb_rx_desc=%d, "
2675                              "rxq->rx_free_thresh=%d",
2676                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2677                 ret = -EINVAL;
2678         }
2679
2680         return ret;
2681 }
2682
2683 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2684 static void __attribute__((cold))
2685 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2686 {
2687         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2688         unsigned i;
2689         uint16_t len = rxq->nb_rx_desc;
2690
2691         /*
2692          * By default, the Rx queue setup function allocates enough memory for
2693          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2694          * extra memory at the end of the descriptor ring to be zero'd out.
2695          */
2696         if (adapter->rx_bulk_alloc_allowed)
2697                 /* zero out extra memory */
2698                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2699
2700         /*
2701          * Zero out HW ring memory. Zero out extra memory at the end of
2702          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2703          * reads extra memory as zeros.
2704          */
2705         for (i = 0; i < len; i++) {
2706                 rxq->rx_ring[i] = zeroed_desc;
2707         }
2708
2709         /*
2710          * initialize extra software ring entries. Space for these extra
2711          * entries is always allocated
2712          */
2713         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2714         for (i = rxq->nb_rx_desc; i < len; ++i) {
2715                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2716         }
2717
2718         rxq->rx_nb_avail = 0;
2719         rxq->rx_next_avail = 0;
2720         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2721         rxq->rx_tail = 0;
2722         rxq->nb_rx_hold = 0;
2723         rxq->pkt_first_seg = NULL;
2724         rxq->pkt_last_seg = NULL;
2725
2726 #ifdef RTE_IXGBE_INC_VECTOR
2727         rxq->rxrearm_start = 0;
2728         rxq->rxrearm_nb = 0;
2729 #endif
2730 }
2731
2732 int __attribute__((cold))
2733 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2734                          uint16_t queue_idx,
2735                          uint16_t nb_desc,
2736                          unsigned int socket_id,
2737                          const struct rte_eth_rxconf *rx_conf,
2738                          struct rte_mempool *mp)
2739 {
2740         const struct rte_memzone *rz;
2741         struct ixgbe_rx_queue *rxq;
2742         struct ixgbe_hw     *hw;
2743         uint16_t len;
2744         struct ixgbe_adapter *adapter =
2745                 (struct ixgbe_adapter *)dev->data->dev_private;
2746
2747         PMD_INIT_FUNC_TRACE();
2748         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2749
2750         /*
2751          * Validate number of receive descriptors.
2752          * It must not exceed hardware maximum, and must be multiple
2753          * of IXGBE_ALIGN.
2754          */
2755         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2756                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2757                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2758                 return -EINVAL;
2759         }
2760
2761         /* Free memory prior to re-allocation if needed... */
2762         if (dev->data->rx_queues[queue_idx] != NULL) {
2763                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2764                 dev->data->rx_queues[queue_idx] = NULL;
2765         }
2766
2767         /* First allocate the rx queue data structure */
2768         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2769                                  RTE_CACHE_LINE_SIZE, socket_id);
2770         if (rxq == NULL)
2771                 return -ENOMEM;
2772         rxq->mb_pool = mp;
2773         rxq->nb_rx_desc = nb_desc;
2774         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2775         rxq->queue_id = queue_idx;
2776         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2777                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2778         rxq->port_id = dev->data->port_id;
2779         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2780                                                         0 : ETHER_CRC_LEN);
2781         rxq->drop_en = rx_conf->rx_drop_en;
2782         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2783
2784         /*
2785          * The packet type in RX descriptor is different for different NICs.
2786          * Some bits are used for x550 but reserved for other NICS.
2787          * So set different masks for different NICs.
2788          */
2789         if (hw->mac.type == ixgbe_mac_X550 ||
2790             hw->mac.type == ixgbe_mac_X550EM_x ||
2791             hw->mac.type == ixgbe_mac_X550EM_a ||
2792             hw->mac.type == ixgbe_mac_X550_vf ||
2793             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2794             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2795                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2796         else
2797                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2798
2799         /*
2800          * Allocate RX ring hardware descriptors. A memzone large enough to
2801          * handle the maximum ring size is allocated in order to allow for
2802          * resizing in later calls to the queue setup function.
2803          */
2804         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2805                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2806         if (rz == NULL) {
2807                 ixgbe_rx_queue_release(rxq);
2808                 return -ENOMEM;
2809         }
2810
2811         /*
2812          * Zero init all the descriptors in the ring.
2813          */
2814         memset(rz->addr, 0, RX_RING_SZ);
2815
2816         /*
2817          * Modified to setup VFRDT for Virtual Function
2818          */
2819         if (hw->mac.type == ixgbe_mac_82599_vf ||
2820             hw->mac.type == ixgbe_mac_X540_vf ||
2821             hw->mac.type == ixgbe_mac_X550_vf ||
2822             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2823             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2824                 rxq->rdt_reg_addr =
2825                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2826                 rxq->rdh_reg_addr =
2827                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2828         } else {
2829                 rxq->rdt_reg_addr =
2830                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2831                 rxq->rdh_reg_addr =
2832                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2833         }
2834
2835         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2836         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2837
2838         /*
2839          * Certain constraints must be met in order to use the bulk buffer
2840          * allocation Rx burst function. If any of Rx queues doesn't meet them
2841          * the feature should be disabled for the whole port.
2842          */
2843         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2844                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2845                                     "preconditions - canceling the feature for "
2846                                     "the whole port[%d]",
2847                              rxq->queue_id, rxq->port_id);
2848                 adapter->rx_bulk_alloc_allowed = false;
2849         }
2850
2851         /*
2852          * Allocate software ring. Allow for space at the end of the
2853          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2854          * function does not access an invalid memory region.
2855          */
2856         len = nb_desc;
2857         if (adapter->rx_bulk_alloc_allowed)
2858                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2859
2860         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2861                                           sizeof(struct ixgbe_rx_entry) * len,
2862                                           RTE_CACHE_LINE_SIZE, socket_id);
2863         if (!rxq->sw_ring) {
2864                 ixgbe_rx_queue_release(rxq);
2865                 return -ENOMEM;
2866         }
2867
2868         /*
2869          * Always allocate even if it's not going to be needed in order to
2870          * simplify the code.
2871          *
2872          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2873          * be requested in ixgbe_dev_rx_init(), which is called later from
2874          * dev_start() flow.
2875          */
2876         rxq->sw_sc_ring =
2877                 rte_zmalloc_socket("rxq->sw_sc_ring",
2878                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2879                                    RTE_CACHE_LINE_SIZE, socket_id);
2880         if (!rxq->sw_sc_ring) {
2881                 ixgbe_rx_queue_release(rxq);
2882                 return -ENOMEM;
2883         }
2884
2885         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2886                             "dma_addr=0x%"PRIx64,
2887                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2888                      rxq->rx_ring_phys_addr);
2889
2890         if (!rte_is_power_of_2(nb_desc)) {
2891                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2892                                     "preconditions - canceling the feature for "
2893                                     "the whole port[%d]",
2894                              rxq->queue_id, rxq->port_id);
2895                 adapter->rx_vec_allowed = false;
2896         } else
2897                 ixgbe_rxq_vec_setup(rxq);
2898
2899         dev->data->rx_queues[queue_idx] = rxq;
2900
2901         ixgbe_reset_rx_queue(adapter, rxq);
2902
2903         return 0;
2904 }
2905
2906 uint32_t
2907 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2908 {
2909 #define IXGBE_RXQ_SCAN_INTERVAL 4
2910         volatile union ixgbe_adv_rx_desc *rxdp;
2911         struct ixgbe_rx_queue *rxq;
2912         uint32_t desc = 0;
2913
2914         rxq = dev->data->rx_queues[rx_queue_id];
2915         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2916
2917         while ((desc < rxq->nb_rx_desc) &&
2918                 (rxdp->wb.upper.status_error &
2919                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2920                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2921                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2922                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2923                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2924                                 desc - rxq->nb_rx_desc]);
2925         }
2926
2927         return desc;
2928 }
2929
2930 int
2931 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2932 {
2933         volatile union ixgbe_adv_rx_desc *rxdp;
2934         struct ixgbe_rx_queue *rxq = rx_queue;
2935         uint32_t desc;
2936
2937         if (unlikely(offset >= rxq->nb_rx_desc))
2938                 return 0;
2939         desc = rxq->rx_tail + offset;
2940         if (desc >= rxq->nb_rx_desc)
2941                 desc -= rxq->nb_rx_desc;
2942
2943         rxdp = &rxq->rx_ring[desc];
2944         return !!(rxdp->wb.upper.status_error &
2945                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2946 }
2947
2948 void __attribute__((cold))
2949 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2950 {
2951         unsigned i;
2952         struct ixgbe_adapter *adapter =
2953                 (struct ixgbe_adapter *)dev->data->dev_private;
2954
2955         PMD_INIT_FUNC_TRACE();
2956
2957         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2958                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2959
2960                 if (txq != NULL) {
2961                         txq->ops->release_mbufs(txq);
2962                         txq->ops->reset(txq);
2963                 }
2964         }
2965
2966         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2967                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2968
2969                 if (rxq != NULL) {
2970                         ixgbe_rx_queue_release_mbufs(rxq);
2971                         ixgbe_reset_rx_queue(adapter, rxq);
2972                 }
2973         }
2974 }
2975
2976 void
2977 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2978 {
2979         unsigned i;
2980
2981         PMD_INIT_FUNC_TRACE();
2982
2983         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2984                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2985                 dev->data->rx_queues[i] = NULL;
2986         }
2987         dev->data->nb_rx_queues = 0;
2988
2989         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2990                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2991                 dev->data->tx_queues[i] = NULL;
2992         }
2993         dev->data->nb_tx_queues = 0;
2994 }
2995
2996 /*********************************************************************
2997  *
2998  *  Device RX/TX init functions
2999  *
3000  **********************************************************************/
3001
3002 /**
3003  * Receive Side Scaling (RSS)
3004  * See section 7.1.2.8 in the following document:
3005  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3006  *
3007  * Principles:
3008  * The source and destination IP addresses of the IP header and the source
3009  * and destination ports of TCP/UDP headers, if any, of received packets are
3010  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3011  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3012  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3013  * RSS output index which is used as the RX queue index where to store the
3014  * received packets.
3015  * The following output is supplied in the RX write-back descriptor:
3016  *     - 32-bit result of the Microsoft RSS hash function,
3017  *     - 4-bit RSS type field.
3018  */
3019
3020 /*
3021  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3022  * Used as the default key.
3023  */
3024 static uint8_t rss_intel_key[40] = {
3025         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3026         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3027         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3028         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3029         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3030 };
3031
3032 static void
3033 ixgbe_rss_disable(struct rte_eth_dev *dev)
3034 {
3035         struct ixgbe_hw *hw;
3036         uint32_t mrqc;
3037         uint32_t mrqc_reg;
3038
3039         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3040         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3041         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3042         mrqc &= ~IXGBE_MRQC_RSSEN;
3043         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3044 }
3045
3046 static void
3047 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3048 {
3049         uint8_t  *hash_key;
3050         uint32_t mrqc;
3051         uint32_t rss_key;
3052         uint64_t rss_hf;
3053         uint16_t i;
3054         uint32_t mrqc_reg;
3055         uint32_t rssrk_reg;
3056
3057         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3058         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3059
3060         hash_key = rss_conf->rss_key;
3061         if (hash_key != NULL) {
3062                 /* Fill in RSS hash key */
3063                 for (i = 0; i < 10; i++) {
3064                         rss_key  = hash_key[(i * 4)];
3065                         rss_key |= hash_key[(i * 4) + 1] << 8;
3066                         rss_key |= hash_key[(i * 4) + 2] << 16;
3067                         rss_key |= hash_key[(i * 4) + 3] << 24;
3068                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3069                 }
3070         }
3071
3072         /* Set configured hashing protocols in MRQC register */
3073         rss_hf = rss_conf->rss_hf;
3074         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3075         if (rss_hf & ETH_RSS_IPV4)
3076                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3077         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3078                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3079         if (rss_hf & ETH_RSS_IPV6)
3080                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3081         if (rss_hf & ETH_RSS_IPV6_EX)
3082                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3083         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3084                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3085         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3086                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3087         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3088                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3089         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3090                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3091         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3092                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3093         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3094 }
3095
3096 int
3097 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3098                           struct rte_eth_rss_conf *rss_conf)
3099 {
3100         struct ixgbe_hw *hw;
3101         uint32_t mrqc;
3102         uint64_t rss_hf;
3103         uint32_t mrqc_reg;
3104
3105         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3106
3107         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3108                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3109                         "NIC.");
3110                 return -ENOTSUP;
3111         }
3112         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3113
3114         /*
3115          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3116          *     "RSS enabling cannot be done dynamically while it must be
3117          *      preceded by a software reset"
3118          * Before changing anything, first check that the update RSS operation
3119          * does not attempt to disable RSS, if RSS was enabled at
3120          * initialization time, or does not attempt to enable RSS, if RSS was
3121          * disabled at initialization time.
3122          */
3123         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3124         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3125         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3126                 if (rss_hf != 0) /* Enable RSS */
3127                         return -(EINVAL);
3128                 return 0; /* Nothing to do */
3129         }
3130         /* RSS enabled */
3131         if (rss_hf == 0) /* Disable RSS */
3132                 return -(EINVAL);
3133         ixgbe_hw_rss_hash_set(hw, rss_conf);
3134         return 0;
3135 }
3136
3137 int
3138 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3139                             struct rte_eth_rss_conf *rss_conf)
3140 {
3141         struct ixgbe_hw *hw;
3142         uint8_t *hash_key;
3143         uint32_t mrqc;
3144         uint32_t rss_key;
3145         uint64_t rss_hf;
3146         uint16_t i;
3147         uint32_t mrqc_reg;
3148         uint32_t rssrk_reg;
3149
3150         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3151         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3152         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3153         hash_key = rss_conf->rss_key;
3154         if (hash_key != NULL) {
3155                 /* Return RSS hash key */
3156                 for (i = 0; i < 10; i++) {
3157                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3158                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3159                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3160                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3161                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3162                 }
3163         }
3164
3165         /* Get RSS functions configured in MRQC register */
3166         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3167         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3168                 rss_conf->rss_hf = 0;
3169                 return 0;
3170         }
3171         rss_hf = 0;
3172         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3173                 rss_hf |= ETH_RSS_IPV4;
3174         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3175                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3176         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3177                 rss_hf |= ETH_RSS_IPV6;
3178         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3179                 rss_hf |= ETH_RSS_IPV6_EX;
3180         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3181                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3182         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3183                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3184         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3185                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3186         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3187                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3188         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3189                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3190         rss_conf->rss_hf = rss_hf;
3191         return 0;
3192 }
3193
3194 static void
3195 ixgbe_rss_configure(struct rte_eth_dev *dev)
3196 {
3197         struct rte_eth_rss_conf rss_conf;
3198         struct ixgbe_hw *hw;
3199         uint32_t reta;
3200         uint16_t i;
3201         uint16_t j;
3202         uint16_t sp_reta_size;
3203         uint32_t reta_reg;
3204
3205         PMD_INIT_FUNC_TRACE();
3206         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3207
3208         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3209
3210         /*
3211          * Fill in redirection table
3212          * The byte-swap is needed because NIC registers are in
3213          * little-endian order.
3214          */
3215         reta = 0;
3216         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3217                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3218
3219                 if (j == dev->data->nb_rx_queues)
3220                         j = 0;
3221                 reta = (reta << 8) | j;
3222                 if ((i & 3) == 3)
3223                         IXGBE_WRITE_REG(hw, reta_reg,
3224                                         rte_bswap32(reta));
3225         }
3226
3227         /*
3228          * Configure the RSS key and the RSS protocols used to compute
3229          * the RSS hash of input packets.
3230          */
3231         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3232         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3233                 ixgbe_rss_disable(dev);
3234                 return;
3235         }
3236         if (rss_conf.rss_key == NULL)
3237                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3238         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3239 }
3240
3241 #define NUM_VFTA_REGISTERS 128
3242 #define NIC_RX_BUFFER_SIZE 0x200
3243 #define X550_RX_BUFFER_SIZE 0x180
3244
3245 static void
3246 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3247 {
3248         struct rte_eth_vmdq_dcb_conf *cfg;
3249         struct ixgbe_hw *hw;
3250         enum rte_eth_nb_pools num_pools;
3251         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3252         uint16_t pbsize;
3253         uint8_t nb_tcs; /* number of traffic classes */
3254         int i;
3255
3256         PMD_INIT_FUNC_TRACE();
3257         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3258         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3259         num_pools = cfg->nb_queue_pools;
3260         /* Check we have a valid number of pools */
3261         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3262                 ixgbe_rss_disable(dev);
3263                 return;
3264         }
3265         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3266         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3267
3268         /*
3269          * RXPBSIZE
3270          * split rx buffer up into sections, each for 1 traffic class
3271          */
3272         switch (hw->mac.type) {
3273         case ixgbe_mac_X550:
3274         case ixgbe_mac_X550EM_x:
3275         case ixgbe_mac_X550EM_a:
3276                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3277                 break;
3278         default:
3279                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3280                 break;
3281         }
3282         for (i = 0; i < nb_tcs; i++) {
3283                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3284
3285                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3286                 /* clear 10 bits. */
3287                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3288                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3289         }
3290         /* zero alloc all unused TCs */
3291         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3292                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3293
3294                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3295                 /* clear 10 bits. */
3296                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3297         }
3298
3299         /* MRQC: enable vmdq and dcb */
3300         mrqc = (num_pools == ETH_16_POOLS) ?
3301                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3302         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3303
3304         /* PFVTCTL: turn on virtualisation and set the default pool */
3305         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3306         if (cfg->enable_default_pool) {
3307                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3308         } else {
3309                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3310         }
3311
3312         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3313
3314         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3315         queue_mapping = 0;
3316         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3317                 /*
3318                  * mapping is done with 3 bits per priority,
3319                  * so shift by i*3 each time
3320                  */
3321                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3322
3323         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3324
3325         /* RTRPCS: DCB related */
3326         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3327
3328         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3329         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3330         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3331         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3332
3333         /* VFTA - enable all vlan filters */
3334         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3335                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3336         }
3337
3338         /* VFRE: pool enabling for receive - 16 or 32 */
3339         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3340                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3341
3342         /*
3343          * MPSAR - allow pools to read specific mac addresses
3344          * In this case, all pools should be able to read from mac addr 0
3345          */
3346         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3347         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3348
3349         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3350         for (i = 0; i < cfg->nb_pool_maps; i++) {
3351                 /* set vlan id in VF register and set the valid bit */
3352                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3353                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3354                 /*
3355                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3356                  * pools, we only need to use the first half of the register
3357                  * i.e. bits 0-31
3358                  */
3359                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3360         }
3361 }
3362
3363 /**
3364  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3365  * @dev: pointer to eth_dev structure
3366  * @dcb_config: pointer to ixgbe_dcb_config structure
3367  */
3368 static void
3369 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3370                        struct ixgbe_dcb_config *dcb_config)
3371 {
3372         uint32_t reg;
3373         uint32_t q;
3374         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3375
3376         PMD_INIT_FUNC_TRACE();
3377         if (hw->mac.type != ixgbe_mac_82598EB) {
3378                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3379                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3380                 reg |= IXGBE_RTTDCS_ARBDIS;
3381                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3382
3383                 /* Enable DCB for Tx with 8 TCs */
3384                 if (dcb_config->num_tcs.pg_tcs == 8) {
3385                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3386                 } else {
3387                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3388                 }
3389                 if (dcb_config->vt_mode)
3390                         reg |= IXGBE_MTQC_VT_ENA;
3391                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3392
3393                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3394                         /* Disable drop for all queues in VMDQ mode*/
3395                         for (q = 0; q < 128; q++)
3396                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3397                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3398                 } else {
3399                         /* Enable drop for all queues in SRIOV mode */
3400                         for (q = 0; q < 128; q++)
3401                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3402                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
3403                 }
3404
3405                 /* Enable the Tx desc arbiter */
3406                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3407                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3408                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3409
3410                 /* Enable Security TX Buffer IFG for DCB */
3411                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3412                 reg |= IXGBE_SECTX_DCB;
3413                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3414         }
3415 }
3416
3417 /**
3418  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3419  * @dev: pointer to rte_eth_dev structure
3420  * @dcb_config: pointer to ixgbe_dcb_config structure
3421  */
3422 static void
3423 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3424                         struct ixgbe_dcb_config *dcb_config)
3425 {
3426         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3427                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3428         struct ixgbe_hw *hw =
3429                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3430
3431         PMD_INIT_FUNC_TRACE();
3432         if (hw->mac.type != ixgbe_mac_82598EB)
3433                 /*PF VF Transmit Enable*/
3434                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3435                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3436
3437         /*Configure general DCB TX parameters*/
3438         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3439 }
3440
3441 static void
3442 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3443                         struct ixgbe_dcb_config *dcb_config)
3444 {
3445         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3446                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3447         struct ixgbe_dcb_tc_config *tc;
3448         uint8_t i, j;
3449
3450         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3451         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3452                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3453                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3454         } else {
3455                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3456                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3457         }
3458         /* User Priority to Traffic Class mapping */
3459         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3460                 j = vmdq_rx_conf->dcb_tc[i];
3461                 tc = &dcb_config->tc_config[j];
3462                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3463                                                 (uint8_t)(1 << j);
3464         }
3465 }
3466
3467 static void
3468 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3469                         struct ixgbe_dcb_config *dcb_config)
3470 {
3471         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3472                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3473         struct ixgbe_dcb_tc_config *tc;
3474         uint8_t i, j;
3475
3476         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3477         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3478                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3479                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3480         } else {
3481                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3482                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3483         }
3484
3485         /* User Priority to Traffic Class mapping */
3486         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3487                 j = vmdq_tx_conf->dcb_tc[i];
3488                 tc = &dcb_config->tc_config[j];
3489                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3490                                                 (uint8_t)(1 << j);
3491         }
3492 }
3493
3494 static void
3495 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3496                 struct ixgbe_dcb_config *dcb_config)
3497 {
3498         struct rte_eth_dcb_rx_conf *rx_conf =
3499                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3500         struct ixgbe_dcb_tc_config *tc;
3501         uint8_t i, j;
3502
3503         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3504         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3505
3506         /* User Priority to Traffic Class mapping */
3507         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3508                 j = rx_conf->dcb_tc[i];
3509                 tc = &dcb_config->tc_config[j];
3510                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3511                                                 (uint8_t)(1 << j);
3512         }
3513 }
3514
3515 static void
3516 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3517                 struct ixgbe_dcb_config *dcb_config)
3518 {
3519         struct rte_eth_dcb_tx_conf *tx_conf =
3520                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3521         struct ixgbe_dcb_tc_config *tc;
3522         uint8_t i, j;
3523
3524         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3525         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3526
3527         /* User Priority to Traffic Class mapping */
3528         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3529                 j = tx_conf->dcb_tc[i];
3530                 tc = &dcb_config->tc_config[j];
3531                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3532                                                 (uint8_t)(1 << j);
3533         }
3534 }
3535
3536 /**
3537  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3538  * @hw: pointer to hardware structure
3539  * @dcb_config: pointer to ixgbe_dcb_config structure
3540  */
3541 static void
3542 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3543                struct ixgbe_dcb_config *dcb_config)
3544 {
3545         uint32_t reg;
3546         uint32_t vlanctrl;
3547         uint8_t i;
3548
3549         PMD_INIT_FUNC_TRACE();
3550         /*
3551          * Disable the arbiter before changing parameters
3552          * (always enable recycle mode; WSP)
3553          */
3554         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3555         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3556
3557         if (hw->mac.type != ixgbe_mac_82598EB) {
3558                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3559                 if (dcb_config->num_tcs.pg_tcs == 4) {
3560                         if (dcb_config->vt_mode)
3561                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3562                                         IXGBE_MRQC_VMDQRT4TCEN;
3563                         else {
3564                                 /* no matter the mode is DCB or DCB_RSS, just
3565                                  * set the MRQE to RSSXTCEN. RSS is controlled
3566                                  * by RSS_FIELD
3567                                  */
3568                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3569                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3570                                         IXGBE_MRQC_RTRSS4TCEN;
3571                         }
3572                 }
3573                 if (dcb_config->num_tcs.pg_tcs == 8) {
3574                         if (dcb_config->vt_mode)
3575                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3576                                         IXGBE_MRQC_VMDQRT8TCEN;
3577                         else {
3578                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3579                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3580                                         IXGBE_MRQC_RTRSS8TCEN;
3581                         }
3582                 }
3583
3584                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3585         }
3586
3587         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3588         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3589         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3590         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3591
3592         /* VFTA - enable all vlan filters */
3593         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3594                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3595         }
3596
3597         /*
3598          * Configure Rx packet plane (recycle mode; WSP) and
3599          * enable arbiter
3600          */
3601         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3602         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3603 }
3604
3605 static void
3606 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3607                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3608 {
3609         switch (hw->mac.type) {
3610         case ixgbe_mac_82598EB:
3611                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3612                 break;
3613         case ixgbe_mac_82599EB:
3614         case ixgbe_mac_X540:
3615         case ixgbe_mac_X550:
3616         case ixgbe_mac_X550EM_x:
3617         case ixgbe_mac_X550EM_a:
3618                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3619                                                   tsa, map);
3620                 break;
3621         default:
3622                 break;
3623         }
3624 }
3625
3626 static void
3627 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3628                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3629 {
3630         switch (hw->mac.type) {
3631         case ixgbe_mac_82598EB:
3632                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3633                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3634                 break;
3635         case ixgbe_mac_82599EB:
3636         case ixgbe_mac_X540:
3637         case ixgbe_mac_X550:
3638         case ixgbe_mac_X550EM_x:
3639         case ixgbe_mac_X550EM_a:
3640                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3641                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3642                 break;
3643         default:
3644                 break;
3645         }
3646 }
3647
3648 #define DCB_RX_CONFIG  1
3649 #define DCB_TX_CONFIG  1
3650 #define DCB_TX_PB      1024
3651 /**
3652  * ixgbe_dcb_hw_configure - Enable DCB and configure
3653  * general DCB in VT mode and non-VT mode parameters
3654  * @dev: pointer to rte_eth_dev structure
3655  * @dcb_config: pointer to ixgbe_dcb_config structure
3656  */
3657 static int
3658 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3659                         struct ixgbe_dcb_config *dcb_config)
3660 {
3661         int     ret = 0;
3662         uint8_t i, pfc_en, nb_tcs;
3663         uint16_t pbsize, rx_buffer_size;
3664         uint8_t config_dcb_rx = 0;
3665         uint8_t config_dcb_tx = 0;
3666         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3667         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3668         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3669         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3670         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3671         struct ixgbe_dcb_tc_config *tc;
3672         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3673         struct ixgbe_hw *hw =
3674                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3675
3676         switch (dev->data->dev_conf.rxmode.mq_mode) {
3677         case ETH_MQ_RX_VMDQ_DCB:
3678                 dcb_config->vt_mode = true;
3679                 if (hw->mac.type != ixgbe_mac_82598EB) {
3680                         config_dcb_rx = DCB_RX_CONFIG;
3681                         /*
3682                          *get dcb and VT rx configuration parameters
3683                          *from rte_eth_conf
3684                          */
3685                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3686                         /*Configure general VMDQ and DCB RX parameters*/
3687                         ixgbe_vmdq_dcb_configure(dev);
3688                 }
3689                 break;
3690         case ETH_MQ_RX_DCB:
3691         case ETH_MQ_RX_DCB_RSS:
3692                 dcb_config->vt_mode = false;
3693                 config_dcb_rx = DCB_RX_CONFIG;
3694                 /* Get dcb TX configuration parameters from rte_eth_conf */
3695                 ixgbe_dcb_rx_config(dev, dcb_config);
3696                 /*Configure general DCB RX parameters*/
3697                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3698                 break;
3699         default:
3700                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3701                 break;
3702         }
3703         switch (dev->data->dev_conf.txmode.mq_mode) {
3704         case ETH_MQ_TX_VMDQ_DCB:
3705                 dcb_config->vt_mode = true;
3706                 config_dcb_tx = DCB_TX_CONFIG;
3707                 /* get DCB and VT TX configuration parameters
3708                  * from rte_eth_conf
3709                  */
3710                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3711                 /*Configure general VMDQ and DCB TX parameters*/
3712                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3713                 break;
3714
3715         case ETH_MQ_TX_DCB:
3716                 dcb_config->vt_mode = false;
3717                 config_dcb_tx = DCB_TX_CONFIG;
3718                 /*get DCB TX configuration parameters from rte_eth_conf*/
3719                 ixgbe_dcb_tx_config(dev, dcb_config);
3720                 /*Configure general DCB TX parameters*/
3721                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3722                 break;
3723         default:
3724                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3725                 break;
3726         }
3727
3728         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3729         /* Unpack map */
3730         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3731         if (nb_tcs == ETH_4_TCS) {
3732                 /* Avoid un-configured priority mapping to TC0 */
3733                 uint8_t j = 4;
3734                 uint8_t mask = 0xFF;
3735
3736                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3737                         mask = (uint8_t)(mask & (~(1 << map[i])));
3738                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3739                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3740                                 map[j++] = i;
3741                         mask >>= 1;
3742                 }
3743                 /* Re-configure 4 TCs BW */
3744                 for (i = 0; i < nb_tcs; i++) {
3745                         tc = &dcb_config->tc_config[i];
3746                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3747                                                 (uint8_t)(100 / nb_tcs);
3748                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3749                                                 (uint8_t)(100 / nb_tcs);
3750                 }
3751                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3752                         tc = &dcb_config->tc_config[i];
3753                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3754                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3755                 }
3756         }
3757
3758         switch (hw->mac.type) {
3759         case ixgbe_mac_X550:
3760         case ixgbe_mac_X550EM_x:
3761         case ixgbe_mac_X550EM_a:
3762                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3763                 break;
3764         default:
3765                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3766                 break;
3767         }
3768
3769         if (config_dcb_rx) {
3770                 /* Set RX buffer size */
3771                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3772                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3773
3774                 for (i = 0; i < nb_tcs; i++) {
3775                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3776                 }
3777                 /* zero alloc all unused TCs */
3778                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3779                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3780                 }
3781         }
3782         if (config_dcb_tx) {
3783                 /* Only support an equally distributed
3784                  *  Tx packet buffer strategy.
3785                  */
3786                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3787                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3788
3789                 for (i = 0; i < nb_tcs; i++) {
3790                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3791                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3792                 }
3793                 /* Clear unused TCs, if any, to zero buffer size*/
3794                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3795                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3796                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3797                 }
3798         }
3799
3800         /*Calculates traffic class credits*/
3801         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3802                                 IXGBE_DCB_TX_CONFIG);
3803         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3804                                 IXGBE_DCB_RX_CONFIG);
3805
3806         if (config_dcb_rx) {
3807                 /* Unpack CEE standard containers */
3808                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3809                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3810                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3811                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3812                 /* Configure PG(ETS) RX */
3813                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3814         }
3815
3816         if (config_dcb_tx) {
3817                 /* Unpack CEE standard containers */
3818                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3819                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3820                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3821                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3822                 /* Configure PG(ETS) TX */
3823                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3824         }
3825
3826         /*Configure queue statistics registers*/
3827         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3828
3829         /* Check if the PFC is supported */
3830         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3831                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3832                 for (i = 0; i < nb_tcs; i++) {
3833                         /*
3834                         * If the TC count is 8,and the default high_water is 48,
3835                         * the low_water is 16 as default.
3836                         */
3837                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3838                         hw->fc.low_water[i] = pbsize / 4;
3839                         /* Enable pfc for this TC */
3840                         tc = &dcb_config->tc_config[i];
3841                         tc->pfc = ixgbe_dcb_pfc_enabled;
3842                 }
3843                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3844                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3845                         pfc_en &= 0x0F;
3846                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3847         }
3848
3849         return ret;
3850 }
3851
3852 /**
3853  * ixgbe_configure_dcb - Configure DCB  Hardware
3854  * @dev: pointer to rte_eth_dev
3855  */
3856 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3857 {
3858         struct ixgbe_dcb_config *dcb_cfg =
3859                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3860         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3861
3862         PMD_INIT_FUNC_TRACE();
3863
3864         /* check support mq_mode for DCB */
3865         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3866             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3867             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3868                 return;
3869
3870         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3871                 return;
3872
3873         /** Configure DCB hardware **/
3874         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3875 }
3876
3877 /*
3878  * VMDq only support for 10 GbE NIC.
3879  */
3880 static void
3881 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3882 {
3883         struct rte_eth_vmdq_rx_conf *cfg;
3884         struct ixgbe_hw *hw;
3885         enum rte_eth_nb_pools num_pools;
3886         uint32_t mrqc, vt_ctl, vlanctrl;
3887         uint32_t vmolr = 0;
3888         int i;
3889
3890         PMD_INIT_FUNC_TRACE();
3891         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3892         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3893         num_pools = cfg->nb_queue_pools;
3894
3895         ixgbe_rss_disable(dev);
3896
3897         /* MRQC: enable vmdq */
3898         mrqc = IXGBE_MRQC_VMDQEN;
3899         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3900
3901         /* PFVTCTL: turn on virtualisation and set the default pool */
3902         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3903         if (cfg->enable_default_pool)
3904                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3905         else
3906                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3907
3908         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3909
3910         for (i = 0; i < (int)num_pools; i++) {
3911                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3912                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3913         }
3914
3915         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3916         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3917         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3918         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3919
3920         /* VFTA - enable all vlan filters */
3921         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3922                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3923
3924         /* VFRE: pool enabling for receive - 64 */
3925         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3926         if (num_pools == ETH_64_POOLS)
3927                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3928
3929         /*
3930          * MPSAR - allow pools to read specific mac addresses
3931          * In this case, all pools should be able to read from mac addr 0
3932          */
3933         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3934         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3935
3936         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3937         for (i = 0; i < cfg->nb_pool_maps; i++) {
3938                 /* set vlan id in VF register and set the valid bit */
3939                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3940                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3941                 /*
3942                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3943                  * pools, we only need to use the first half of the register
3944                  * i.e. bits 0-31
3945                  */
3946                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3947                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3948                                         (cfg->pool_map[i].pools & UINT32_MAX));
3949                 else
3950                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3951                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3952
3953         }
3954
3955         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3956         if (cfg->enable_loop_back) {
3957                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3958                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3959                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3960         }
3961
3962         IXGBE_WRITE_FLUSH(hw);
3963 }
3964
3965 /*
3966  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3967  * @hw: pointer to hardware structure
3968  */
3969 static void
3970 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3971 {
3972         uint32_t reg;
3973         uint32_t q;
3974
3975         PMD_INIT_FUNC_TRACE();
3976         /*PF VF Transmit Enable*/
3977         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3978         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3979
3980         /* Disable the Tx desc arbiter so that MTQC can be changed */
3981         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3982         reg |= IXGBE_RTTDCS_ARBDIS;
3983         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3984
3985         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3986         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3987
3988         /* Disable drop for all queues */
3989         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3990                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3991                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3992
3993         /* Enable the Tx desc arbiter */
3994         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3995         reg &= ~IXGBE_RTTDCS_ARBDIS;
3996         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3997
3998         IXGBE_WRITE_FLUSH(hw);
3999 }
4000
4001 static int __attribute__((cold))
4002 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4003 {
4004         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4005         uint64_t dma_addr;
4006         unsigned int i;
4007
4008         /* Initialize software ring entries */
4009         for (i = 0; i < rxq->nb_rx_desc; i++) {
4010                 volatile union ixgbe_adv_rx_desc *rxd;
4011                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4012
4013                 if (mbuf == NULL) {
4014                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4015                                      (unsigned) rxq->queue_id);
4016                         return -ENOMEM;
4017                 }
4018
4019                 rte_mbuf_refcnt_set(mbuf, 1);
4020                 mbuf->next = NULL;
4021                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4022                 mbuf->nb_segs = 1;
4023                 mbuf->port = rxq->port_id;
4024
4025                 dma_addr =
4026                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4027                 rxd = &rxq->rx_ring[i];
4028                 rxd->read.hdr_addr = 0;
4029                 rxd->read.pkt_addr = dma_addr;
4030                 rxe[i].mbuf = mbuf;
4031         }
4032
4033         return 0;
4034 }
4035
4036 static int
4037 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4038 {
4039         struct ixgbe_hw *hw;
4040         uint32_t mrqc;
4041
4042         ixgbe_rss_configure(dev);
4043
4044         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4045
4046         /* MRQC: enable VF RSS */
4047         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4048         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4049         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4050         case ETH_64_POOLS:
4051                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4052                 break;
4053
4054         case ETH_32_POOLS:
4055                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4056                 break;
4057
4058         default:
4059                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4060                 return -EINVAL;
4061         }
4062
4063         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4064
4065         return 0;
4066 }
4067
4068 static int
4069 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4070 {
4071         struct ixgbe_hw *hw =
4072                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4073
4074         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4075         case ETH_64_POOLS:
4076                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4077                         IXGBE_MRQC_VMDQEN);
4078                 break;
4079
4080         case ETH_32_POOLS:
4081                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4082                         IXGBE_MRQC_VMDQRT4TCEN);
4083                 break;
4084
4085         case ETH_16_POOLS:
4086                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4087                         IXGBE_MRQC_VMDQRT8TCEN);
4088                 break;
4089         default:
4090                 PMD_INIT_LOG(ERR,
4091                         "invalid pool number in IOV mode");
4092                 break;
4093         }
4094         return 0;
4095 }
4096
4097 static int
4098 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4099 {
4100         struct ixgbe_hw *hw =
4101                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4102
4103         if (hw->mac.type == ixgbe_mac_82598EB)
4104                 return 0;
4105
4106         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4107                 /*
4108                  * SRIOV inactive scheme
4109                  * any DCB/RSS w/o VMDq multi-queue setting
4110                  */
4111                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4112                 case ETH_MQ_RX_RSS:
4113                 case ETH_MQ_RX_DCB_RSS:
4114                 case ETH_MQ_RX_VMDQ_RSS:
4115                         ixgbe_rss_configure(dev);
4116                         break;
4117
4118                 case ETH_MQ_RX_VMDQ_DCB:
4119                         ixgbe_vmdq_dcb_configure(dev);
4120                         break;
4121
4122                 case ETH_MQ_RX_VMDQ_ONLY:
4123                         ixgbe_vmdq_rx_hw_configure(dev);
4124                         break;
4125
4126                 case ETH_MQ_RX_NONE:
4127                 default:
4128                         /* if mq_mode is none, disable rss mode.*/
4129                         ixgbe_rss_disable(dev);
4130                         break;
4131                 }
4132         } else {
4133                 /*
4134                  * SRIOV active scheme
4135                  * Support RSS together with VMDq & SRIOV
4136                  */
4137                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4138                 case ETH_MQ_RX_RSS:
4139                 case ETH_MQ_RX_VMDQ_RSS:
4140                         ixgbe_config_vf_rss(dev);
4141                         break;
4142                 case ETH_MQ_RX_VMDQ_DCB:
4143                         ixgbe_vmdq_dcb_configure(dev);
4144                         break;
4145                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4146                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4147                         PMD_INIT_LOG(ERR,
4148                                 "Could not support DCB/RSS with VMDq & SRIOV");
4149                         return -1;
4150                 default:
4151                         ixgbe_config_vf_default(dev);
4152                         break;
4153                 }
4154         }
4155
4156         return 0;
4157 }
4158
4159 static int
4160 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4161 {
4162         struct ixgbe_hw *hw =
4163                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4164         uint32_t mtqc;
4165         uint32_t rttdcs;
4166
4167         if (hw->mac.type == ixgbe_mac_82598EB)
4168                 return 0;
4169
4170         /* disable arbiter before setting MTQC */
4171         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4172         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4173         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4174
4175         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4176                 /*
4177                  * SRIOV inactive scheme
4178                  * any DCB w/o VMDq multi-queue setting
4179                  */
4180                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4181                         ixgbe_vmdq_tx_hw_configure(hw);
4182                 else {
4183                         mtqc = IXGBE_MTQC_64Q_1PB;
4184                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4185                 }
4186         } else {
4187                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4188
4189                 /*
4190                  * SRIOV active scheme
4191                  * FIXME if support DCB together with VMDq & SRIOV
4192                  */
4193                 case ETH_64_POOLS:
4194                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4195                         break;
4196                 case ETH_32_POOLS:
4197                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4198                         break;
4199                 case ETH_16_POOLS:
4200                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4201                                 IXGBE_MTQC_8TC_8TQ;
4202                         break;
4203                 default:
4204                         mtqc = IXGBE_MTQC_64Q_1PB;
4205                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4206                 }
4207                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4208         }
4209
4210         /* re-enable arbiter */
4211         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4212         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4213
4214         return 0;
4215 }
4216
4217 /**
4218  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4219  *
4220  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4221  * spec rev. 3.0 chapter 8.2.3.8.13.
4222  *
4223  * @pool Memory pool of the Rx queue
4224  */
4225 static inline uint32_t
4226 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4227 {
4228         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4229
4230         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4231         uint16_t maxdesc =
4232                 IPV4_MAX_PKT_LEN /
4233                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4234
4235         if (maxdesc >= 16)
4236                 return IXGBE_RSCCTL_MAXDESC_16;
4237         else if (maxdesc >= 8)
4238                 return IXGBE_RSCCTL_MAXDESC_8;
4239         else if (maxdesc >= 4)
4240                 return IXGBE_RSCCTL_MAXDESC_4;
4241         else
4242                 return IXGBE_RSCCTL_MAXDESC_1;
4243 }
4244
4245 /**
4246  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4247  * interrupt
4248  *
4249  * (Taken from FreeBSD tree)
4250  * (yes this is all very magic and confusing :)
4251  *
4252  * @dev port handle
4253  * @entry the register array entry
4254  * @vector the MSIX vector for this queue
4255  * @type RX/TX/MISC
4256  */
4257 static void
4258 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4259 {
4260         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4261         u32 ivar, index;
4262
4263         vector |= IXGBE_IVAR_ALLOC_VAL;
4264
4265         switch (hw->mac.type) {
4266
4267         case ixgbe_mac_82598EB:
4268                 if (type == -1)
4269                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4270                 else
4271                         entry += (type * 64);
4272                 index = (entry >> 2) & 0x1F;
4273                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4274                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4275                 ivar |= (vector << (8 * (entry & 0x3)));
4276                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4277                 break;
4278
4279         case ixgbe_mac_82599EB:
4280         case ixgbe_mac_X540:
4281                 if (type == -1) { /* MISC IVAR */
4282                         index = (entry & 1) * 8;
4283                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4284                         ivar &= ~(0xFF << index);
4285                         ivar |= (vector << index);
4286                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4287                 } else {        /* RX/TX IVARS */
4288                         index = (16 * (entry & 1)) + (8 * type);
4289                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4290                         ivar &= ~(0xFF << index);
4291                         ivar |= (vector << index);
4292                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4293                 }
4294
4295                 break;
4296
4297         default:
4298                 break;
4299         }
4300 }
4301
4302 void __attribute__((cold))
4303 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4304 {
4305         uint16_t i, rx_using_sse;
4306         struct ixgbe_adapter *adapter =
4307                 (struct ixgbe_adapter *)dev->data->dev_private;
4308
4309         /*
4310          * In order to allow Vector Rx there are a few configuration
4311          * conditions to be met and Rx Bulk Allocation should be allowed.
4312          */
4313         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4314             !adapter->rx_bulk_alloc_allowed) {
4315                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4316                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4317                                     "not enabled",
4318                              dev->data->port_id);
4319
4320                 adapter->rx_vec_allowed = false;
4321         }
4322
4323         /*
4324          * Initialize the appropriate LRO callback.
4325          *
4326          * If all queues satisfy the bulk allocation preconditions
4327          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4328          * Otherwise use a single allocation version.
4329          */
4330         if (dev->data->lro) {
4331                 if (adapter->rx_bulk_alloc_allowed) {
4332                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4333                                            "allocation version");
4334                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4335                 } else {
4336                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4337                                            "allocation version");
4338                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4339                 }
4340         } else if (dev->data->scattered_rx) {
4341                 /*
4342                  * Set the non-LRO scattered callback: there are Vector and
4343                  * single allocation versions.
4344                  */
4345                 if (adapter->rx_vec_allowed) {
4346                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4347                                             "callback (port=%d).",
4348                                      dev->data->port_id);
4349
4350                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4351                 } else if (adapter->rx_bulk_alloc_allowed) {
4352                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4353                                            "allocation callback (port=%d).",
4354                                      dev->data->port_id);
4355                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4356                 } else {
4357                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4358                                             "single allocation) "
4359                                             "Scattered Rx callback "
4360                                             "(port=%d).",
4361                                      dev->data->port_id);
4362
4363                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4364                 }
4365         /*
4366          * Below we set "simple" callbacks according to port/queues parameters.
4367          * If parameters allow we are going to choose between the following
4368          * callbacks:
4369          *    - Vector
4370          *    - Bulk Allocation
4371          *    - Single buffer allocation (the simplest one)
4372          */
4373         } else if (adapter->rx_vec_allowed) {
4374                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4375                                     "burst size no less than %d (port=%d).",
4376                              RTE_IXGBE_DESCS_PER_LOOP,
4377                              dev->data->port_id);
4378
4379                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4380         } else if (adapter->rx_bulk_alloc_allowed) {
4381                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4382                                     "satisfied. Rx Burst Bulk Alloc function "
4383                                     "will be used on port=%d.",
4384                              dev->data->port_id);
4385
4386                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4387         } else {
4388                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4389                                     "satisfied, or Scattered Rx is requested "
4390                                     "(port=%d).",
4391                              dev->data->port_id);
4392
4393                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4394         }
4395
4396         /* Propagate information about RX function choice through all queues. */
4397
4398         rx_using_sse =
4399                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4400                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4401
4402         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4403                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4404
4405                 rxq->rx_using_sse = rx_using_sse;
4406         }
4407 }
4408
4409 /**
4410  * ixgbe_set_rsc - configure RSC related port HW registers
4411  *
4412  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4413  * of 82599 Spec (x540 configuration is virtually the same).
4414  *
4415  * @dev port handle
4416  *
4417  * Returns 0 in case of success or a non-zero error code
4418  */
4419 static int
4420 ixgbe_set_rsc(struct rte_eth_dev *dev)
4421 {
4422         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4423         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4424         struct rte_eth_dev_info dev_info = { 0 };
4425         bool rsc_capable = false;
4426         uint16_t i;
4427         uint32_t rdrxctl;
4428
4429         /* Sanity check */
4430         dev->dev_ops->dev_infos_get(dev, &dev_info);
4431         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4432                 rsc_capable = true;
4433
4434         if (!rsc_capable && rx_conf->enable_lro) {
4435                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4436                                    "support it");
4437                 return -EINVAL;
4438         }
4439
4440         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4441
4442         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4443                 /*
4444                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4445                  * 3.0 RSC configuration requires HW CRC stripping being
4446                  * enabled. If user requested both HW CRC stripping off
4447                  * and RSC on - return an error.
4448                  */
4449                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4450                                     "is disabled");
4451                 return -EINVAL;
4452         }
4453
4454         /* RFCTL configuration  */
4455         if (rsc_capable) {
4456                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4457
4458                 if (rx_conf->enable_lro)
4459                         /*
4460                          * Since NFS packets coalescing is not supported - clear
4461                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4462                          * enabled.
4463                          */
4464                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4465                                    IXGBE_RFCTL_NFSR_DIS);
4466                 else
4467                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4468
4469                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4470         }
4471
4472         /* If LRO hasn't been requested - we are done here. */
4473         if (!rx_conf->enable_lro)
4474                 return 0;
4475
4476         /* Set RDRXCTL.RSCACKC bit */
4477         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4478         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4479         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4480
4481         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4482         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4483                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4484                 uint32_t srrctl =
4485                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4486                 uint32_t rscctl =
4487                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4488                 uint32_t psrtype =
4489                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4490                 uint32_t eitr =
4491                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4492
4493                 /*
4494                  * ixgbe PMD doesn't support header-split at the moment.
4495                  *
4496                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4497                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4498                  * should be configured even if header split is not
4499                  * enabled. We will configure it 128 bytes following the
4500                  * recommendation in the spec.
4501                  */
4502                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4503                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4504                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4505
4506                 /*
4507                  * TODO: Consider setting the Receive Descriptor Minimum
4508                  * Threshold Size for an RSC case. This is not an obviously
4509                  * beneficiary option but the one worth considering...
4510                  */
4511
4512                 rscctl |= IXGBE_RSCCTL_RSCEN;
4513                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4514                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4515
4516                 /*
4517                  * RSC: Set ITR interval corresponding to 2K ints/s.
4518                  *
4519                  * Full-sized RSC aggregations for a 10Gb/s link will
4520                  * arrive at about 20K aggregation/s rate.
4521                  *
4522                  * 2K inst/s rate will make only 10% of the
4523                  * aggregations to be closed due to the interrupt timer
4524                  * expiration for a streaming at wire-speed case.
4525                  *
4526                  * For a sparse streaming case this setting will yield
4527                  * at most 500us latency for a single RSC aggregation.
4528                  */
4529                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4530                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4531
4532                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4533                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4534                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4535                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4536
4537                 /*
4538                  * RSC requires the mapping of the queue to the
4539                  * interrupt vector.
4540                  */
4541                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4542         }
4543
4544         dev->data->lro = 1;
4545
4546         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4547
4548         return 0;
4549 }
4550
4551 /*
4552  * Initializes Receive Unit.
4553  */
4554 int __attribute__((cold))
4555 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4556 {
4557         struct ixgbe_hw     *hw;
4558         struct ixgbe_rx_queue *rxq;
4559         uint64_t bus_addr;
4560         uint32_t rxctrl;
4561         uint32_t fctrl;
4562         uint32_t hlreg0;
4563         uint32_t maxfrs;
4564         uint32_t srrctl;
4565         uint32_t rdrxctl;
4566         uint32_t rxcsum;
4567         uint16_t buf_size;
4568         uint16_t i;
4569         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4570         int rc;
4571
4572         PMD_INIT_FUNC_TRACE();
4573         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4574
4575         /*
4576          * Make sure receives are disabled while setting
4577          * up the RX context (registers, descriptor rings, etc.).
4578          */
4579         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4580         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4581
4582         /* Enable receipt of broadcasted frames */
4583         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4584         fctrl |= IXGBE_FCTRL_BAM;
4585         fctrl |= IXGBE_FCTRL_DPF;
4586         fctrl |= IXGBE_FCTRL_PMCF;
4587         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4588
4589         /*
4590          * Configure CRC stripping, if any.
4591          */
4592         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4593         if (rx_conf->hw_strip_crc)
4594                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4595         else
4596                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4597
4598         /*
4599          * Configure jumbo frame support, if any.
4600          */
4601         if (rx_conf->jumbo_frame == 1) {
4602                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4603                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4604                 maxfrs &= 0x0000FFFF;
4605                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4606                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4607         } else
4608                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4609
4610         /*
4611          * If loopback mode is configured for 82599, set LPBK bit.
4612          */
4613         if (hw->mac.type == ixgbe_mac_82599EB &&
4614                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4615                 hlreg0 |= IXGBE_HLREG0_LPBK;
4616         else
4617                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4618
4619         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4620
4621         /* Setup RX queues */
4622         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4623                 rxq = dev->data->rx_queues[i];
4624
4625                 /*
4626                  * Reset crc_len in case it was changed after queue setup by a
4627                  * call to configure.
4628                  */
4629                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4630
4631                 /* Setup the Base and Length of the Rx Descriptor Rings */
4632                 bus_addr = rxq->rx_ring_phys_addr;
4633                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4634                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4635                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4636                                 (uint32_t)(bus_addr >> 32));
4637                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4638                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4639                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4640                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4641
4642                 /* Configure the SRRCTL register */
4643 #ifdef RTE_HEADER_SPLIT_ENABLE
4644                 /*
4645                  * Configure Header Split
4646                  */
4647                 if (rx_conf->header_split) {
4648                         if (hw->mac.type == ixgbe_mac_82599EB) {
4649                                 /* Must setup the PSRTYPE register */
4650                                 uint32_t psrtype;
4651
4652                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4653                                         IXGBE_PSRTYPE_UDPHDR   |
4654                                         IXGBE_PSRTYPE_IPV4HDR  |
4655                                         IXGBE_PSRTYPE_IPV6HDR;
4656                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4657                         }
4658                         srrctl = ((rx_conf->split_hdr_size <<
4659                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4660                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4661                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4662                 } else
4663 #endif
4664                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4665
4666                 /* Set if packets are dropped when no descriptors available */
4667                 if (rxq->drop_en)
4668                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4669
4670                 /*
4671                  * Configure the RX buffer size in the BSIZEPACKET field of
4672                  * the SRRCTL register of the queue.
4673                  * The value is in 1 KB resolution. Valid values can be from
4674                  * 1 KB to 16 KB.
4675                  */
4676                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4677                         RTE_PKTMBUF_HEADROOM);
4678                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4679                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4680
4681                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4682
4683                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4684                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4685
4686                 /* It adds dual VLAN length for supporting dual VLAN */
4687                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4688                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4689                         dev->data->scattered_rx = 1;
4690         }
4691
4692         if (rx_conf->enable_scatter)
4693                 dev->data->scattered_rx = 1;
4694
4695         /*
4696          * Device configured with multiple RX queues.
4697          */
4698         ixgbe_dev_mq_rx_configure(dev);
4699
4700         /*
4701          * Setup the Checksum Register.
4702          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4703          * Enable IP/L4 checkum computation by hardware if requested to do so.
4704          */
4705         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4706         rxcsum |= IXGBE_RXCSUM_PCSD;
4707         if (rx_conf->hw_ip_checksum)
4708                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4709         else
4710                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4711
4712         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4713
4714         if (hw->mac.type == ixgbe_mac_82599EB ||
4715             hw->mac.type == ixgbe_mac_X540) {
4716                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4717                 if (rx_conf->hw_strip_crc)
4718                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4719                 else
4720                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4721                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4722                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4723         }
4724
4725         rc = ixgbe_set_rsc(dev);
4726         if (rc)
4727                 return rc;
4728
4729         ixgbe_set_rx_function(dev);
4730
4731         return 0;
4732 }
4733
4734 /*
4735  * Initializes Transmit Unit.
4736  */
4737 void __attribute__((cold))
4738 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4739 {
4740         struct ixgbe_hw     *hw;
4741         struct ixgbe_tx_queue *txq;
4742         uint64_t bus_addr;
4743         uint32_t hlreg0;
4744         uint32_t txctrl;
4745         uint16_t i;
4746
4747         PMD_INIT_FUNC_TRACE();
4748         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4749
4750         /* Enable TX CRC (checksum offload requirement) and hw padding
4751          * (TSO requirement)
4752          */
4753         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4754         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4755         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4756
4757         /* Setup the Base and Length of the Tx Descriptor Rings */
4758         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4759                 txq = dev->data->tx_queues[i];
4760
4761                 bus_addr = txq->tx_ring_phys_addr;
4762                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4763                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4764                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4765                                 (uint32_t)(bus_addr >> 32));
4766                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4767                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4768                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4769                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4770                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4771
4772                 /*
4773                  * Disable Tx Head Writeback RO bit, since this hoses
4774                  * bookkeeping if things aren't delivered in order.
4775                  */
4776                 switch (hw->mac.type) {
4777                 case ixgbe_mac_82598EB:
4778                         txctrl = IXGBE_READ_REG(hw,
4779                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4780                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4781                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4782                                         txctrl);
4783                         break;
4784
4785                 case ixgbe_mac_82599EB:
4786                 case ixgbe_mac_X540:
4787                 case ixgbe_mac_X550:
4788                 case ixgbe_mac_X550EM_x:
4789                 case ixgbe_mac_X550EM_a:
4790                 default:
4791                         txctrl = IXGBE_READ_REG(hw,
4792                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4793                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4794                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4795                                         txctrl);
4796                         break;
4797                 }
4798         }
4799
4800         /* Device configured with multiple TX queues. */
4801         ixgbe_dev_mq_tx_configure(dev);
4802 }
4803
4804 /*
4805  * Set up link for 82599 loopback mode Tx->Rx.
4806  */
4807 static inline void __attribute__((cold))
4808 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4809 {
4810         PMD_INIT_FUNC_TRACE();
4811
4812         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4813                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4814                                 IXGBE_SUCCESS) {
4815                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4816                         /* ignore error */
4817                         return;
4818                 }
4819         }
4820
4821         /* Restart link */
4822         IXGBE_WRITE_REG(hw,
4823                         IXGBE_AUTOC,
4824                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4825         ixgbe_reset_pipeline_82599(hw);
4826
4827         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4828         msec_delay(50);
4829 }
4830
4831
4832 /*
4833  * Start Transmit and Receive Units.
4834  */
4835 int __attribute__((cold))
4836 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4837 {
4838         struct ixgbe_hw     *hw;
4839         struct ixgbe_tx_queue *txq;
4840         struct ixgbe_rx_queue *rxq;
4841         uint32_t txdctl;
4842         uint32_t dmatxctl;
4843         uint32_t rxctrl;
4844         uint16_t i;
4845         int ret = 0;
4846
4847         PMD_INIT_FUNC_TRACE();
4848         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4849
4850         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4851                 txq = dev->data->tx_queues[i];
4852                 /* Setup Transmit Threshold Registers */
4853                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4854                 txdctl |= txq->pthresh & 0x7F;
4855                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4856                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4857                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4858         }
4859
4860         if (hw->mac.type != ixgbe_mac_82598EB) {
4861                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4862                 dmatxctl |= IXGBE_DMATXCTL_TE;
4863                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4864         }
4865
4866         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4867                 txq = dev->data->tx_queues[i];
4868                 if (!txq->tx_deferred_start) {
4869                         ret = ixgbe_dev_tx_queue_start(dev, i);
4870                         if (ret < 0)
4871                                 return ret;
4872                 }
4873         }
4874
4875         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4876                 rxq = dev->data->rx_queues[i];
4877                 if (!rxq->rx_deferred_start) {
4878                         ret = ixgbe_dev_rx_queue_start(dev, i);
4879                         if (ret < 0)
4880                                 return ret;
4881                 }
4882         }
4883
4884         /* Enable Receive engine */
4885         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4886         if (hw->mac.type == ixgbe_mac_82598EB)
4887                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4888         rxctrl |= IXGBE_RXCTRL_RXEN;
4889         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4890
4891         /* If loopback mode is enabled for 82599, set up the link accordingly */
4892         if (hw->mac.type == ixgbe_mac_82599EB &&
4893                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4894                 ixgbe_setup_loopback_link_82599(hw);
4895
4896         return 0;
4897 }
4898
4899 /*
4900  * Start Receive Units for specified queue.
4901  */
4902 int __attribute__((cold))
4903 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4904 {
4905         struct ixgbe_hw     *hw;
4906         struct ixgbe_rx_queue *rxq;
4907         uint32_t rxdctl;
4908         int poll_ms;
4909
4910         PMD_INIT_FUNC_TRACE();
4911         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4912
4913         if (rx_queue_id < dev->data->nb_rx_queues) {
4914                 rxq = dev->data->rx_queues[rx_queue_id];
4915
4916                 /* Allocate buffers for descriptor rings */
4917                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4918                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4919                                      rx_queue_id);
4920                         return -1;
4921                 }
4922                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4923                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4924                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4925
4926                 /* Wait until RX Enable ready */
4927                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4928                 do {
4929                         rte_delay_ms(1);
4930                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4931                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4932                 if (!poll_ms)
4933                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4934                                      rx_queue_id);
4935                 rte_wmb();
4936                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4937                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4938                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4939         } else
4940                 return -1;
4941
4942         return 0;
4943 }
4944
4945 /*
4946  * Stop Receive Units for specified queue.
4947  */
4948 int __attribute__((cold))
4949 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4950 {
4951         struct ixgbe_hw     *hw;
4952         struct ixgbe_adapter *adapter =
4953                 (struct ixgbe_adapter *)dev->data->dev_private;
4954         struct ixgbe_rx_queue *rxq;
4955         uint32_t rxdctl;
4956         int poll_ms;
4957
4958         PMD_INIT_FUNC_TRACE();
4959         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4960
4961         if (rx_queue_id < dev->data->nb_rx_queues) {
4962                 rxq = dev->data->rx_queues[rx_queue_id];
4963
4964                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4965                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4966                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4967
4968                 /* Wait until RX Enable bit clear */
4969                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4970                 do {
4971                         rte_delay_ms(1);
4972                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4973                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4974                 if (!poll_ms)
4975                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4976                                      rx_queue_id);
4977
4978                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4979
4980                 ixgbe_rx_queue_release_mbufs(rxq);
4981                 ixgbe_reset_rx_queue(adapter, rxq);
4982                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4983         } else
4984                 return -1;
4985
4986         return 0;
4987 }
4988
4989
4990 /*
4991  * Start Transmit Units for specified queue.
4992  */
4993 int __attribute__((cold))
4994 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
4995 {
4996         struct ixgbe_hw     *hw;
4997         struct ixgbe_tx_queue *txq;
4998         uint32_t txdctl;
4999         int poll_ms;
5000
5001         PMD_INIT_FUNC_TRACE();
5002         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5003
5004         if (tx_queue_id < dev->data->nb_tx_queues) {
5005                 txq = dev->data->tx_queues[tx_queue_id];
5006                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5007                 txdctl |= IXGBE_TXDCTL_ENABLE;
5008                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5009
5010                 /* Wait until TX Enable ready */
5011                 if (hw->mac.type == ixgbe_mac_82599EB) {
5012                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5013                         do {
5014                                 rte_delay_ms(1);
5015                                 txdctl = IXGBE_READ_REG(hw,
5016                                         IXGBE_TXDCTL(txq->reg_idx));
5017                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5018                         if (!poll_ms)
5019                                 PMD_INIT_LOG(ERR, "Could not enable "
5020                                              "Tx Queue %d", tx_queue_id);
5021                 }
5022                 rte_wmb();
5023                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5024                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5025                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5026         } else
5027                 return -1;
5028
5029         return 0;
5030 }
5031
5032 /*
5033  * Stop Transmit Units for specified queue.
5034  */
5035 int __attribute__((cold))
5036 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5037 {
5038         struct ixgbe_hw     *hw;
5039         struct ixgbe_tx_queue *txq;
5040         uint32_t txdctl;
5041         uint32_t txtdh, txtdt;
5042         int poll_ms;
5043
5044         PMD_INIT_FUNC_TRACE();
5045         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5046
5047         if (tx_queue_id >= dev->data->nb_tx_queues)
5048                 return -1;
5049
5050         txq = dev->data->tx_queues[tx_queue_id];
5051
5052         /* Wait until TX queue is empty */
5053         if (hw->mac.type == ixgbe_mac_82599EB) {
5054                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5055                 do {
5056                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5057                         txtdh = IXGBE_READ_REG(hw,
5058                                                IXGBE_TDH(txq->reg_idx));
5059                         txtdt = IXGBE_READ_REG(hw,
5060                                                IXGBE_TDT(txq->reg_idx));
5061                 } while (--poll_ms && (txtdh != txtdt));
5062                 if (!poll_ms)
5063                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5064                                      "when stopping.", tx_queue_id);
5065         }
5066
5067         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5068         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5069         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5070
5071         /* Wait until TX Enable bit clear */
5072         if (hw->mac.type == ixgbe_mac_82599EB) {
5073                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5074                 do {
5075                         rte_delay_ms(1);
5076                         txdctl = IXGBE_READ_REG(hw,
5077                                                 IXGBE_TXDCTL(txq->reg_idx));
5078                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5079                 if (!poll_ms)
5080                         PMD_INIT_LOG(ERR, "Could not disable "
5081                                      "Tx Queue %d", tx_queue_id);
5082         }
5083
5084         if (txq->ops != NULL) {
5085                 txq->ops->release_mbufs(txq);
5086                 txq->ops->reset(txq);
5087         }
5088         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5089
5090         return 0;
5091 }
5092
5093 void
5094 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5095         struct rte_eth_rxq_info *qinfo)
5096 {
5097         struct ixgbe_rx_queue *rxq;
5098
5099         rxq = dev->data->rx_queues[queue_id];
5100
5101         qinfo->mp = rxq->mb_pool;
5102         qinfo->scattered_rx = dev->data->scattered_rx;
5103         qinfo->nb_desc = rxq->nb_rx_desc;
5104
5105         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5106         qinfo->conf.rx_drop_en = rxq->drop_en;
5107         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5108 }
5109
5110 void
5111 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5112         struct rte_eth_txq_info *qinfo)
5113 {
5114         struct ixgbe_tx_queue *txq;
5115
5116         txq = dev->data->tx_queues[queue_id];
5117
5118         qinfo->nb_desc = txq->nb_tx_desc;
5119
5120         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5121         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5122         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5123
5124         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5125         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5126         qinfo->conf.txq_flags = txq->txq_flags;
5127         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5128 }
5129
5130 /*
5131  * [VF] Initializes Receive Unit.
5132  */
5133 int __attribute__((cold))
5134 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5135 {
5136         struct ixgbe_hw     *hw;
5137         struct ixgbe_rx_queue *rxq;
5138         uint64_t bus_addr;
5139         uint32_t srrctl, psrtype = 0;
5140         uint16_t buf_size;
5141         uint16_t i;
5142         int ret;
5143
5144         PMD_INIT_FUNC_TRACE();
5145         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5146
5147         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5148                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5149                         "it should be power of 2");
5150                 return -1;
5151         }
5152
5153         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5154                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5155                         "it should be equal to or less than %d",
5156                         hw->mac.max_rx_queues);
5157                 return -1;
5158         }
5159
5160         /*
5161          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5162          * disables the VF receipt of packets if the PF MTU is > 1500.
5163          * This is done to deal with 82599 limitations that imposes
5164          * the PF and all VFs to share the same MTU.
5165          * Then, the PF driver enables again the VF receipt of packet when
5166          * the VF driver issues a IXGBE_VF_SET_LPE request.
5167          * In the meantime, the VF device cannot be used, even if the VF driver
5168          * and the Guest VM network stack are ready to accept packets with a
5169          * size up to the PF MTU.
5170          * As a work-around to this PF behaviour, force the call to
5171          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5172          * VF packets received can work in all cases.
5173          */
5174         ixgbevf_rlpml_set_vf(hw,
5175                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5176
5177         /* Setup RX queues */
5178         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5179                 rxq = dev->data->rx_queues[i];
5180
5181                 /* Allocate buffers for descriptor rings */
5182                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5183                 if (ret)
5184                         return ret;
5185
5186                 /* Setup the Base and Length of the Rx Descriptor Rings */
5187                 bus_addr = rxq->rx_ring_phys_addr;
5188
5189                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5190                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5191                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5192                                 (uint32_t)(bus_addr >> 32));
5193                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5194                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5195                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5196                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5197
5198
5199                 /* Configure the SRRCTL register */
5200 #ifdef RTE_HEADER_SPLIT_ENABLE
5201                 /*
5202                  * Configure Header Split
5203                  */
5204                 if (dev->data->dev_conf.rxmode.header_split) {
5205                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5206                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5207                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5208                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5209                 } else
5210 #endif
5211                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5212
5213                 /* Set if packets are dropped when no descriptors available */
5214                 if (rxq->drop_en)
5215                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5216
5217                 /*
5218                  * Configure the RX buffer size in the BSIZEPACKET field of
5219                  * the SRRCTL register of the queue.
5220                  * The value is in 1 KB resolution. Valid values can be from
5221                  * 1 KB to 16 KB.
5222                  */
5223                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5224                         RTE_PKTMBUF_HEADROOM);
5225                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5226                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5227
5228                 /*
5229                  * VF modification to write virtual function SRRCTL register
5230                  */
5231                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5232
5233                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5234                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5235
5236                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5237                     /* It adds dual VLAN length for supporting dual VLAN */
5238                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5239                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5240                         if (!dev->data->scattered_rx)
5241                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5242                         dev->data->scattered_rx = 1;
5243                 }
5244         }
5245
5246 #ifdef RTE_HEADER_SPLIT_ENABLE
5247         if (dev->data->dev_conf.rxmode.header_split)
5248                 /* Must setup the PSRTYPE register */
5249                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5250                         IXGBE_PSRTYPE_UDPHDR   |
5251                         IXGBE_PSRTYPE_IPV4HDR  |
5252                         IXGBE_PSRTYPE_IPV6HDR;
5253 #endif
5254
5255         /* Set RQPL for VF RSS according to max Rx queue */
5256         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5257                 IXGBE_PSRTYPE_RQPL_SHIFT;
5258         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5259
5260         ixgbe_set_rx_function(dev);
5261
5262         return 0;
5263 }
5264
5265 /*
5266  * [VF] Initializes Transmit Unit.
5267  */
5268 void __attribute__((cold))
5269 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5270 {
5271         struct ixgbe_hw     *hw;
5272         struct ixgbe_tx_queue *txq;
5273         uint64_t bus_addr;
5274         uint32_t txctrl;
5275         uint16_t i;
5276
5277         PMD_INIT_FUNC_TRACE();
5278         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5279
5280         /* Setup the Base and Length of the Tx Descriptor Rings */
5281         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5282                 txq = dev->data->tx_queues[i];
5283                 bus_addr = txq->tx_ring_phys_addr;
5284                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5285                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5286                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5287                                 (uint32_t)(bus_addr >> 32));
5288                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5289                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5290                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5291                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5292                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5293
5294                 /*
5295                  * Disable Tx Head Writeback RO bit, since this hoses
5296                  * bookkeeping if things aren't delivered in order.
5297                  */
5298                 txctrl = IXGBE_READ_REG(hw,
5299                                 IXGBE_VFDCA_TXCTRL(i));
5300                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5301                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5302                                 txctrl);
5303         }
5304 }
5305
5306 /*
5307  * [VF] Start Transmit and Receive Units.
5308  */
5309 void __attribute__((cold))
5310 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5311 {
5312         struct ixgbe_hw     *hw;
5313         struct ixgbe_tx_queue *txq;
5314         struct ixgbe_rx_queue *rxq;
5315         uint32_t txdctl;
5316         uint32_t rxdctl;
5317         uint16_t i;
5318         int poll_ms;
5319
5320         PMD_INIT_FUNC_TRACE();
5321         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5322
5323         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5324                 txq = dev->data->tx_queues[i];
5325                 /* Setup Transmit Threshold Registers */
5326                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5327                 txdctl |= txq->pthresh & 0x7F;
5328                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5329                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5330                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5331         }
5332
5333         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5334
5335                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5336                 txdctl |= IXGBE_TXDCTL_ENABLE;
5337                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5338
5339                 poll_ms = 10;
5340                 /* Wait until TX Enable ready */
5341                 do {
5342                         rte_delay_ms(1);
5343                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5344                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5345                 if (!poll_ms)
5346                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5347         }
5348         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5349
5350                 rxq = dev->data->rx_queues[i];
5351
5352                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5353                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5354                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5355
5356                 /* Wait until RX Enable ready */
5357                 poll_ms = 10;
5358                 do {
5359                         rte_delay_ms(1);
5360                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5361                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5362                 if (!poll_ms)
5363                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5364                 rte_wmb();
5365                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5366
5367         }
5368 }
5369
5370 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5371 int __attribute__((weak))
5372 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5373 {
5374         return -1;
5375 }
5376
5377 uint16_t __attribute__((weak))
5378 ixgbe_recv_pkts_vec(
5379         void __rte_unused *rx_queue,
5380         struct rte_mbuf __rte_unused **rx_pkts,
5381         uint16_t __rte_unused nb_pkts)
5382 {
5383         return 0;
5384 }
5385
5386 uint16_t __attribute__((weak))
5387 ixgbe_recv_scattered_pkts_vec(
5388         void __rte_unused *rx_queue,
5389         struct rte_mbuf __rte_unused **rx_pkts,
5390         uint16_t __rte_unused nb_pkts)
5391 {
5392         return 0;
5393 }
5394
5395 int __attribute__((weak))
5396 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5397 {
5398         return -1;
5399 }