950243237e959223c7414268e9bedfe0a216645a
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i, nb_free = 0;
130         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
131
132         /* check DD bit on threshold descriptor */
133         status = txq->tx_ring[txq->tx_next_dd].wb.status;
134         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
135                 return 0;
136
137         /*
138          * first buffer to free from S/W ring is at index
139          * tx_next_dd - (tx_rs_thresh-1)
140          */
141         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
142
143         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
144                 /* free buffers one at a time */
145                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
146                 txep->mbuf = NULL;
147
148                 if (unlikely(m == NULL))
149                         continue;
150
151                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
152                     (nb_free > 0 && m->pool != free[0]->pool)) {
153                         rte_mempool_put_bulk(free[0]->pool,
154                                              (void **)free, nb_free);
155                         nb_free = 0;
156                 }
157
158                 free[nb_free++] = m;
159         }
160
161         if (nb_free > 0)
162                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
163
164         /* buffers were freed, update counters */
165         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
166         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
167         if (txq->tx_next_dd >= txq->nb_tx_desc)
168                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
169
170         return txq->tx_rs_thresh;
171 }
172
173 /* Populate 4 descriptors with data from 4 mbufs */
174 static inline void
175 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
176 {
177         uint64_t buf_dma_addr;
178         uint32_t pkt_len;
179         int i;
180
181         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
182                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
183                 pkt_len = (*pkts)->data_len;
184
185                 /* write data to descriptor */
186                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187
188                 txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190
191                 txdp->read.olinfo_status =
192                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
193
194                 rte_prefetch0(&(*pkts)->pool);
195         }
196 }
197
198 /* Populate 1 descriptor with data from 1 mbuf */
199 static inline void
200 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
201 {
202         uint64_t buf_dma_addr;
203         uint32_t pkt_len;
204
205         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
206         pkt_len = (*pkts)->data_len;
207
208         /* write data to descriptor */
209         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
210         txdp->read.cmd_type_len =
211                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
212         txdp->read.olinfo_status =
213                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
214         rte_prefetch0(&(*pkts)->pool);
215 }
216
217 /*
218  * Fill H/W descriptor ring with mbuf data.
219  * Copy mbuf pointers to the S/W ring.
220  */
221 static inline void
222 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
223                       uint16_t nb_pkts)
224 {
225         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
226         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
227         const int N_PER_LOOP = 4;
228         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
229         int mainpart, leftover;
230         int i, j;
231
232         /*
233          * Process most of the packets in chunks of N pkts.  Any
234          * leftover packets will get processed one at a time.
235          */
236         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
237         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
238         for (i = 0; i < mainpart; i += N_PER_LOOP) {
239                 /* Copy N mbuf pointers to the S/W ring */
240                 for (j = 0; j < N_PER_LOOP; ++j) {
241                         (txep + i + j)->mbuf = *(pkts + i + j);
242                 }
243                 tx4(txdp + i, pkts + i);
244         }
245
246         if (unlikely(leftover > 0)) {
247                 for (i = 0; i < leftover; ++i) {
248                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
249                         tx1(txdp + mainpart + i, pkts + mainpart + i);
250                 }
251         }
252 }
253
254 static inline uint16_t
255 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
256              uint16_t nb_pkts)
257 {
258         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
259         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
260         uint16_t n = 0;
261
262         /*
263          * Begin scanning the H/W ring for done descriptors when the
264          * number of available descriptors drops below tx_free_thresh.  For
265          * each done descriptor, free the associated buffer.
266          */
267         if (txq->nb_tx_free < txq->tx_free_thresh)
268                 ixgbe_tx_free_bufs(txq);
269
270         /* Only use descriptors that are available */
271         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
272         if (unlikely(nb_pkts == 0))
273                 return 0;
274
275         /* Use exactly nb_pkts descriptors */
276         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
277
278         /*
279          * At this point, we know there are enough descriptors in the
280          * ring to transmit all the packets.  This assumes that each
281          * mbuf contains a single segment, and that no new offloads
282          * are expected, which would require a new context descriptor.
283          */
284
285         /*
286          * See if we're going to wrap-around. If so, handle the top
287          * of the descriptor ring first, then do the bottom.  If not,
288          * the processing looks just like the "bottom" part anyway...
289          */
290         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
291                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
292                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
293
294                 /*
295                  * We know that the last descriptor in the ring will need to
296                  * have its RS bit set because tx_rs_thresh has to be
297                  * a divisor of the ring size
298                  */
299                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
300                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
301                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302
303                 txq->tx_tail = 0;
304         }
305
306         /* Fill H/W descriptor ring with mbuf data */
307         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
308         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
309
310         /*
311          * Determine if RS bit should be set
312          * This is what we actually want:
313          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
314          * but instead of subtracting 1 and doing >=, we can just do
315          * greater than without subtracting.
316          */
317         if (txq->tx_tail > txq->tx_next_rs) {
318                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
319                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
320                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
321                                                 txq->tx_rs_thresh);
322                 if (txq->tx_next_rs >= txq->nb_tx_desc)
323                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
324         }
325
326         /*
327          * Check for wrap-around. This would only happen if we used
328          * up to the last descriptor in the ring, no more, no less.
329          */
330         if (txq->tx_tail >= txq->nb_tx_desc)
331                 txq->tx_tail = 0;
332
333         /* update tail pointer */
334         rte_wmb();
335         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
336
337         return nb_pkts;
338 }
339
340 uint16_t
341 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
342                        uint16_t nb_pkts)
343 {
344         uint16_t nb_tx;
345
346         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
347         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
348                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
349
350         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
351         nb_tx = 0;
352         while (nb_pkts) {
353                 uint16_t ret, n;
354
355                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
356                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
357                 nb_tx = (uint16_t)(nb_tx + ret);
358                 nb_pkts = (uint16_t)(nb_pkts - ret);
359                 if (ret < n)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
370 {
371         uint32_t type_tucmd_mlhl;
372         uint32_t mss_l4len_idx = 0;
373         uint32_t ctx_idx;
374         uint32_t vlan_macip_lens;
375         union ixgbe_tx_offload tx_offload_mask;
376         uint32_t seqnum_seed = 0;
377
378         ctx_idx = txq->ctx_curr;
379         tx_offload_mask.data[0] = 0;
380         tx_offload_mask.data[1] = 0;
381         type_tucmd_mlhl = 0;
382
383         /* Specify which HW CTX to upload. */
384         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
385
386         if (ol_flags & PKT_TX_VLAN_PKT) {
387                 tx_offload_mask.vlan_tci |= ~0;
388         }
389
390         /* check if TCP segmentation required for this packet */
391         if (ol_flags & PKT_TX_TCP_SEG) {
392                 /* implies IP cksum in IPv4 */
393                 if (ol_flags & PKT_TX_IP_CKSUM)
394                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
397                 else
398                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
401
402                 tx_offload_mask.l2_len |= ~0;
403                 tx_offload_mask.l3_len |= ~0;
404                 tx_offload_mask.l4_len |= ~0;
405                 tx_offload_mask.tso_segsz |= ~0;
406                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408         } else { /* no TSO, check if hardware checksum is needed */
409                 if (ol_flags & PKT_TX_IP_CKSUM) {
410                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411                         tx_offload_mask.l2_len |= ~0;
412                         tx_offload_mask.l3_len |= ~0;
413                 }
414
415                 switch (ol_flags & PKT_TX_L4_MASK) {
416                 case PKT_TX_UDP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427                         tx_offload_mask.l2_len |= ~0;
428                         tx_offload_mask.l3_len |= ~0;
429                         break;
430                 case PKT_TX_SCTP_CKSUM:
431                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
432                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
434                         tx_offload_mask.l2_len |= ~0;
435                         tx_offload_mask.l3_len |= ~0;
436                         break;
437                 default:
438                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
439                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
440                         break;
441                 }
442         }
443
444         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
445                 tx_offload_mask.outer_l2_len |= ~0;
446                 tx_offload_mask.outer_l3_len |= ~0;
447                 tx_offload_mask.l2_len |= ~0;
448                 seqnum_seed |= tx_offload.outer_l3_len
449                                << IXGBE_ADVTXD_OUTER_IPLEN;
450                 seqnum_seed |= tx_offload.l2_len
451                                << IXGBE_ADVTXD_TUNNEL_LEN;
452         }
453
454         txq->ctx_cache[ctx_idx].flags = ol_flags;
455         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
456                 tx_offload_mask.data[0] & tx_offload.data[0];
457         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
458                 tx_offload_mask.data[1] & tx_offload.data[1];
459         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
460
461         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
462         vlan_macip_lens = tx_offload.l3_len;
463         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
464                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
465                                     IXGBE_ADVTXD_MACLEN_SHIFT);
466         else
467                 vlan_macip_lens |= (tx_offload.l2_len <<
468                                     IXGBE_ADVTXD_MACLEN_SHIFT);
469         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
470         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
471         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
472         ctx_txd->seqnum_seed     = seqnum_seed;
473 }
474
475 /*
476  * Check which hardware context can be used. Use the existing match
477  * or create a new context descriptor.
478  */
479 static inline uint32_t
480 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
481                    union ixgbe_tx_offload tx_offload)
482 {
483         /* If match with the current used context */
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* What if match with the next context  */
494         txq->ctx_curr ^= 1;
495         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
496                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
497                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
498                      & tx_offload.data[0])) &&
499                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
500                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
501                      & tx_offload.data[1]))))
502                 return txq->ctx_curr;
503
504         /* Mismatch, use the previous context */
505         return IXGBE_CTX_NUM;
506 }
507
508 static inline uint32_t
509 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
510 {
511         uint32_t tmp = 0;
512
513         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
514                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
515         if (ol_flags & PKT_TX_IP_CKSUM)
516                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
517         if (ol_flags & PKT_TX_TCP_SEG)
518                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
519         return tmp;
520 }
521
522 static inline uint32_t
523 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
524 {
525         uint32_t cmdtype = 0;
526
527         if (ol_flags & PKT_TX_VLAN_PKT)
528                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
529         if (ol_flags & PKT_TX_TCP_SEG)
530                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
531         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
532                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
533         if (ol_flags & PKT_TX_MACSEC)
534                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
535         return cmdtype;
536 }
537
538 /* Default RS bit threshold values */
539 #ifndef DEFAULT_TX_RS_THRESH
540 #define DEFAULT_TX_RS_THRESH   32
541 #endif
542 #ifndef DEFAULT_TX_FREE_THRESH
543 #define DEFAULT_TX_FREE_THRESH 32
544 #endif
545
546 /* Reset transmit descriptors after they have been used */
547 static inline int
548 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
549 {
550         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
551         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
552         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
553         uint16_t nb_tx_desc = txq->nb_tx_desc;
554         uint16_t desc_to_clean_to;
555         uint16_t nb_tx_to_clean;
556         uint32_t status;
557
558         /* Determine the last descriptor needing to be cleaned */
559         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
560         if (desc_to_clean_to >= nb_tx_desc)
561                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
562
563         /* Check to make sure the last descriptor to clean is done */
564         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
565         status = txr[desc_to_clean_to].wb.status;
566         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
567                 PMD_TX_FREE_LOG(DEBUG,
568                                 "TX descriptor %4u is not done"
569                                 "(port=%d queue=%d)",
570                                 desc_to_clean_to,
571                                 txq->port_id, txq->queue_id);
572                 /* Failed to clean any descriptors, better luck next time */
573                 return -(1);
574         }
575
576         /* Figure out how many descriptors will be cleaned */
577         if (last_desc_cleaned > desc_to_clean_to)
578                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
579                                                         desc_to_clean_to);
580         else
581                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
582                                                 last_desc_cleaned);
583
584         PMD_TX_FREE_LOG(DEBUG,
585                         "Cleaning %4u TX descriptors: %4u to %4u "
586                         "(port=%d queue=%d)",
587                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
588                         txq->port_id, txq->queue_id);
589
590         /*
591          * The last descriptor to clean is done, so that means all the
592          * descriptors from the last descriptor that was cleaned
593          * up to the last descriptor with the RS bit set
594          * are done. Only reset the threshold descriptor.
595          */
596         txr[desc_to_clean_to].wb.status = 0;
597
598         /* Update the txq to reflect the last descriptor that was cleaned */
599         txq->last_desc_cleaned = desc_to_clean_to;
600         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
601
602         /* No Error */
603         return 0;
604 }
605
606 uint16_t
607 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
608                 uint16_t nb_pkts)
609 {
610         struct ixgbe_tx_queue *txq;
611         struct ixgbe_tx_entry *sw_ring;
612         struct ixgbe_tx_entry *txe, *txn;
613         volatile union ixgbe_adv_tx_desc *txr;
614         volatile union ixgbe_adv_tx_desc *txd, *txp;
615         struct rte_mbuf     *tx_pkt;
616         struct rte_mbuf     *m_seg;
617         uint64_t buf_dma_addr;
618         uint32_t olinfo_status;
619         uint32_t cmd_type_len;
620         uint32_t pkt_len;
621         uint16_t slen;
622         uint64_t ol_flags;
623         uint16_t tx_id;
624         uint16_t tx_last;
625         uint16_t nb_tx;
626         uint16_t nb_used;
627         uint64_t tx_ol_req;
628         uint32_t ctx = 0;
629         uint32_t new_ctx;
630         union ixgbe_tx_offload tx_offload;
631
632         tx_offload.data[0] = 0;
633         tx_offload.data[1] = 0;
634         txq = tx_queue;
635         sw_ring = txq->sw_ring;
636         txr     = txq->tx_ring;
637         tx_id   = txq->tx_tail;
638         txe = &sw_ring[tx_id];
639         txp = NULL;
640
641         /* Determine if the descriptor ring needs to be cleaned. */
642         if (txq->nb_tx_free < txq->tx_free_thresh)
643                 ixgbe_xmit_cleanup(txq);
644
645         rte_prefetch0(&txe->mbuf->pool);
646
647         /* TX loop */
648         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
649                 new_ctx = 0;
650                 tx_pkt = *tx_pkts++;
651                 pkt_len = tx_pkt->pkt_len;
652
653                 /*
654                  * Determine how many (if any) context descriptors
655                  * are needed for offload functionality.
656                  */
657                 ol_flags = tx_pkt->ol_flags;
658
659                 /* If hardware offload required */
660                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
661                 if (tx_ol_req) {
662                         tx_offload.l2_len = tx_pkt->l2_len;
663                         tx_offload.l3_len = tx_pkt->l3_len;
664                         tx_offload.l4_len = tx_pkt->l4_len;
665                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
666                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
667                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
668                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
669
670                         /* If new context need be built or reuse the exist ctx. */
671                         ctx = what_advctx_update(txq, tx_ol_req,
672                                 tx_offload);
673                         /* Only allocate context descriptor if required*/
674                         new_ctx = (ctx == IXGBE_CTX_NUM);
675                         ctx = txq->ctx_curr;
676                 }
677
678                 /*
679                  * Keep track of how many descriptors are used this loop
680                  * This will always be the number of segments + the number of
681                  * Context descriptors required to transmit the packet
682                  */
683                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
684
685                 if (txp != NULL &&
686                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
687                         /* set RS on the previous packet in the burst */
688                         txp->read.cmd_type_len |=
689                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
690
691                 /*
692                  * The number of descriptors that must be allocated for a
693                  * packet is the number of segments of that packet, plus 1
694                  * Context Descriptor for the hardware offload, if any.
695                  * Determine the last TX descriptor to allocate in the TX ring
696                  * for the packet, starting from the current position (tx_id)
697                  * in the ring.
698                  */
699                 tx_last = (uint16_t) (tx_id + nb_used - 1);
700
701                 /* Circular ring */
702                 if (tx_last >= txq->nb_tx_desc)
703                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
704
705                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
706                            " tx_first=%u tx_last=%u",
707                            (unsigned) txq->port_id,
708                            (unsigned) txq->queue_id,
709                            (unsigned) pkt_len,
710                            (unsigned) tx_id,
711                            (unsigned) tx_last);
712
713                 /*
714                  * Make sure there are enough TX descriptors available to
715                  * transmit the entire packet.
716                  * nb_used better be less than or equal to txq->tx_rs_thresh
717                  */
718                 if (nb_used > txq->nb_tx_free) {
719                         PMD_TX_FREE_LOG(DEBUG,
720                                         "Not enough free TX descriptors "
721                                         "nb_used=%4u nb_free=%4u "
722                                         "(port=%d queue=%d)",
723                                         nb_used, txq->nb_tx_free,
724                                         txq->port_id, txq->queue_id);
725
726                         if (ixgbe_xmit_cleanup(txq) != 0) {
727                                 /* Could not clean any descriptors */
728                                 if (nb_tx == 0)
729                                         return 0;
730                                 goto end_of_tx;
731                         }
732
733                         /* nb_used better be <= txq->tx_rs_thresh */
734                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
735                                 PMD_TX_FREE_LOG(DEBUG,
736                                         "The number of descriptors needed to "
737                                         "transmit the packet exceeds the "
738                                         "RS bit threshold. This will impact "
739                                         "performance."
740                                         "nb_used=%4u nb_free=%4u "
741                                         "tx_rs_thresh=%4u. "
742                                         "(port=%d queue=%d)",
743                                         nb_used, txq->nb_tx_free,
744                                         txq->tx_rs_thresh,
745                                         txq->port_id, txq->queue_id);
746                                 /*
747                                  * Loop here until there are enough TX
748                                  * descriptors or until the ring cannot be
749                                  * cleaned.
750                                  */
751                                 while (nb_used > txq->nb_tx_free) {
752                                         if (ixgbe_xmit_cleanup(txq) != 0) {
753                                                 /*
754                                                  * Could not clean any
755                                                  * descriptors
756                                                  */
757                                                 if (nb_tx == 0)
758                                                         return 0;
759                                                 goto end_of_tx;
760                                         }
761                                 }
762                         }
763                 }
764
765                 /*
766                  * By now there are enough free TX descriptors to transmit
767                  * the packet.
768                  */
769
770                 /*
771                  * Set common flags of all TX Data Descriptors.
772                  *
773                  * The following bits must be set in all Data Descriptors:
774                  *   - IXGBE_ADVTXD_DTYP_DATA
775                  *   - IXGBE_ADVTXD_DCMD_DEXT
776                  *
777                  * The following bits must be set in the first Data Descriptor
778                  * and are ignored in the other ones:
779                  *   - IXGBE_ADVTXD_DCMD_IFCS
780                  *   - IXGBE_ADVTXD_MAC_1588
781                  *   - IXGBE_ADVTXD_DCMD_VLE
782                  *
783                  * The following bits must only be set in the last Data
784                  * Descriptor:
785                  *   - IXGBE_TXD_CMD_EOP
786                  *
787                  * The following bits can be set in any Data Descriptor, but
788                  * are only set in the last Data Descriptor:
789                  *   - IXGBE_TXD_CMD_RS
790                  */
791                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
792                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
793
794 #ifdef RTE_LIBRTE_IEEE1588
795                 if (ol_flags & PKT_TX_IEEE1588_TMST)
796                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
797 #endif
798
799                 olinfo_status = 0;
800                 if (tx_ol_req) {
801
802                         if (ol_flags & PKT_TX_TCP_SEG) {
803                                 /* when TSO is on, paylen in descriptor is the
804                                  * not the packet len but the tcp payload len */
805                                 pkt_len -= (tx_offload.l2_len +
806                                         tx_offload.l3_len + tx_offload.l4_len);
807                         }
808
809                         /*
810                          * Setup the TX Advanced Context Descriptor if required
811                          */
812                         if (new_ctx) {
813                                 volatile struct ixgbe_adv_tx_context_desc *
814                                     ctx_txd;
815
816                                 ctx_txd = (volatile struct
817                                     ixgbe_adv_tx_context_desc *)
818                                     &txr[tx_id];
819
820                                 txn = &sw_ring[txe->next_id];
821                                 rte_prefetch0(&txn->mbuf->pool);
822
823                                 if (txe->mbuf != NULL) {
824                                         rte_pktmbuf_free_seg(txe->mbuf);
825                                         txe->mbuf = NULL;
826                                 }
827
828                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
829                                         tx_offload);
830
831                                 txe->last_id = tx_last;
832                                 tx_id = txe->next_id;
833                                 txe = txn;
834                         }
835
836                         /*
837                          * Setup the TX Advanced Data Descriptor,
838                          * This path will go through
839                          * whatever new/reuse the context descriptor
840                          */
841                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
842                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
843                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
844                 }
845
846                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
847
848                 m_seg = tx_pkt;
849                 do {
850                         txd = &txr[tx_id];
851                         txn = &sw_ring[txe->next_id];
852                         rte_prefetch0(&txn->mbuf->pool);
853
854                         if (txe->mbuf != NULL)
855                                 rte_pktmbuf_free_seg(txe->mbuf);
856                         txe->mbuf = m_seg;
857
858                         /*
859                          * Set up Transmit Data Descriptor.
860                          */
861                         slen = m_seg->data_len;
862                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
863                         txd->read.buffer_addr =
864                                 rte_cpu_to_le_64(buf_dma_addr);
865                         txd->read.cmd_type_len =
866                                 rte_cpu_to_le_32(cmd_type_len | slen);
867                         txd->read.olinfo_status =
868                                 rte_cpu_to_le_32(olinfo_status);
869                         txe->last_id = tx_last;
870                         tx_id = txe->next_id;
871                         txe = txn;
872                         m_seg = m_seg->next;
873                 } while (m_seg != NULL);
874
875                 /*
876                  * The last packet data descriptor needs End Of Packet (EOP)
877                  */
878                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
879                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
880                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
881
882                 /* Set RS bit only on threshold packets' last descriptor */
883                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
884                         PMD_TX_FREE_LOG(DEBUG,
885                                         "Setting RS bit on TXD id="
886                                         "%4u (port=%d queue=%d)",
887                                         tx_last, txq->port_id, txq->queue_id);
888
889                         cmd_type_len |= IXGBE_TXD_CMD_RS;
890
891                         /* Update txq RS bit counters */
892                         txq->nb_tx_used = 0;
893                         txp = NULL;
894                 } else
895                         txp = txd;
896
897                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
898         }
899
900 end_of_tx:
901         /* set RS on last packet in the burst */
902         if (txp != NULL)
903                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
904
905         rte_wmb();
906
907         /*
908          * Set the Transmit Descriptor Tail (TDT)
909          */
910         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
911                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
912                    (unsigned) tx_id, (unsigned) nb_tx);
913         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
914         txq->tx_tail = tx_id;
915
916         return nb_tx;
917 }
918
919 /*********************************************************************
920  *
921  *  TX prep functions
922  *
923  **********************************************************************/
924 uint16_t
925 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
926 {
927         int i, ret;
928         uint64_t ol_flags;
929         struct rte_mbuf *m;
930         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
931
932         for (i = 0; i < nb_pkts; i++) {
933                 m = tx_pkts[i];
934                 ol_flags = m->ol_flags;
935
936                 /**
937                  * Check if packet meets requirements for number of segments
938                  *
939                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
940                  *       non-TSO
941                  */
942
943                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
944                         rte_errno = -EINVAL;
945                         return i;
946                 }
947
948                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
949                         rte_errno = -ENOTSUP;
950                         return i;
951                 }
952
953 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
954                 ret = rte_validate_tx_offload(m);
955                 if (ret != 0) {
956                         rte_errno = ret;
957                         return i;
958                 }
959 #endif
960                 ret = rte_net_intel_cksum_prepare(m);
961                 if (ret != 0) {
962                         rte_errno = ret;
963                         return i;
964                 }
965         }
966
967         return i;
968 }
969
970 /*********************************************************************
971  *
972  *  RX functions
973  *
974  **********************************************************************/
975
976 #define IXGBE_PACKET_TYPE_ETHER                         0X00
977 #define IXGBE_PACKET_TYPE_IPV4                          0X01
978 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
979 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
980 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
981 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
982 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
983 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
984 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
985 #define IXGBE_PACKET_TYPE_IPV6                          0X04
986 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
987 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
988 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
989 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
990 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
991 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
992 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
993 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
994 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
995 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
996 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
997 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
998 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
999 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1000 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1001 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1002 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1003 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1004 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1005 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1006 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1007 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1008 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1009
1010 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1011 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1012 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1013 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1014 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1015 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1016 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1017 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1018 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1019 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1020 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1021 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1022 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1023 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1024 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1025 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1026 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1027 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1028 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1029 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1030 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1031 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1032 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1033
1034 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1035 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1036 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1037 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1038 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1039 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1040 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1041 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1042 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1043 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1044 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1045 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1046 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1047 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1048 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1049 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1050 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1051 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1052 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1053 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1054 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1055 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1056 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1057
1058 #define IXGBE_PACKET_TYPE_MAX               0X80
1059 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1060 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1061
1062 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1063 static inline uint32_t
1064 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1065 {
1066         /**
1067          * Use 2 different table for normal packet and tunnel packet
1068          * to save the space.
1069          */
1070         static const uint32_t
1071                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1072                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1073                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1074                         RTE_PTYPE_L3_IPV4,
1075                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1077                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1079                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1085                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1086                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1087                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1089                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1090                         RTE_PTYPE_L3_IPV6,
1091                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1092                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1093                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1094                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1095                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1096                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1097                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV6_EXT,
1099                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1101                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1102                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1103                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1105                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1106                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1107                         RTE_PTYPE_INNER_L3_IPV6,
1108                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1110                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1111                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1112                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1113                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1114                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1116                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1117                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1118                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1119                         RTE_PTYPE_INNER_L3_IPV6,
1120                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1122                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1125                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1126                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1128                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1129                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1130                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1132                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1134                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1135                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1137                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1140                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1141                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1142                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1143                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1144                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1145                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1146                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1147                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1148                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1149                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1150                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1151                         RTE_PTYPE_L2_ETHER |
1152                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1154         };
1155
1156         static const uint32_t
1157                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1158                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1160                         RTE_PTYPE_INNER_L2_ETHER,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1164                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1166                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1167                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1169                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1176                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1178                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1179                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1180                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1181                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1182                         RTE_PTYPE_INNER_L4_TCP,
1183                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1184                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1185                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1186                         RTE_PTYPE_INNER_L4_TCP,
1187                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1189                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1190                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1192                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1193                         RTE_PTYPE_INNER_L4_TCP,
1194                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1195                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1196                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1197                         RTE_PTYPE_INNER_L3_IPV4,
1198                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1199                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1201                         RTE_PTYPE_INNER_L4_UDP,
1202                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1205                         RTE_PTYPE_INNER_L4_UDP,
1206                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1207                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1209                         RTE_PTYPE_INNER_L4_SCTP,
1210                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1211                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1216                         RTE_PTYPE_INNER_L4_UDP,
1217                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1220                         RTE_PTYPE_INNER_L4_SCTP,
1221                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1224                         RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1228                         RTE_PTYPE_INNER_L4_SCTP,
1229                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1231                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1232                         RTE_PTYPE_INNER_L4_SCTP,
1233                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1236                         RTE_PTYPE_INNER_L4_TCP,
1237                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1240                         RTE_PTYPE_INNER_L4_UDP,
1241
1242                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1244                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV4,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1250                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1251                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1252                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV6,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4,
1269                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1270                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1271                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1272                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1273                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1274                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1275                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1276                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1277                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1278                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1280                         RTE_PTYPE_INNER_L3_IPV4,
1281                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1282                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1285                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1286                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1287                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1288                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1289                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1290                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1293                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1294                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1297                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1298                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1301                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1302                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                         RTE_PTYPE_INNER_L3_IPV4,
1305                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1306                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1309                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1310                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1313                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1314                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1315                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1316                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1317                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1318                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1321                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1322                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1323                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1324                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1325                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1326                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1329                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1330                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1333         };
1334
1335         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1336                 return RTE_PTYPE_UNKNOWN;
1337
1338         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1339
1340         /* For tunnel packet */
1341         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1342                 /* Remove the tunnel bit to save the space. */
1343                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1344                 return ptype_table_tn[pkt_info];
1345         }
1346
1347         /**
1348          * For x550, if it's not tunnel,
1349          * tunnel type bit should be set to 0.
1350          * Reuse 82599's mask.
1351          */
1352         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1353
1354         return ptype_table[pkt_info];
1355 }
1356
1357 static inline uint64_t
1358 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1359 {
1360         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1361                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1362                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1363                 PKT_RX_RSS_HASH, 0, 0, 0,
1364                 0, 0, 0,  PKT_RX_FDIR,
1365         };
1366 #ifdef RTE_LIBRTE_IEEE1588
1367         static uint64_t ip_pkt_etqf_map[8] = {
1368                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1369                 0, 0, 0, 0,
1370         };
1371
1372         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1373                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1374                                 ip_rss_types_map[pkt_info & 0XF];
1375         else
1376                 return ip_rss_types_map[pkt_info & 0XF];
1377 #else
1378         return ip_rss_types_map[pkt_info & 0XF];
1379 #endif
1380 }
1381
1382 static inline uint64_t
1383 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1384 {
1385         uint64_t pkt_flags;
1386
1387         /*
1388          * Check if VLAN present only.
1389          * Do not check whether L3/L4 rx checksum done by NIC or not,
1390          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1391          */
1392         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1393
1394 #ifdef RTE_LIBRTE_IEEE1588
1395         if (rx_status & IXGBE_RXD_STAT_TMST)
1396                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1397 #endif
1398         return pkt_flags;
1399 }
1400
1401 static inline uint64_t
1402 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1403 {
1404         uint64_t pkt_flags;
1405
1406         /*
1407          * Bit 31: IPE, IPv4 checksum error
1408          * Bit 30: L4I, L4I integrity error
1409          */
1410         static uint64_t error_to_pkt_flags_map[4] = {
1411                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1412                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1413                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1414                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1415         };
1416         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1417                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1418
1419         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1420             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1421                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1422         }
1423
1424         return pkt_flags;
1425 }
1426
1427 /*
1428  * LOOK_AHEAD defines how many desc statuses to check beyond the
1429  * current descriptor.
1430  * It must be a pound define for optimal performance.
1431  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1432  * function only works with LOOK_AHEAD=8.
1433  */
1434 #define LOOK_AHEAD 8
1435 #if (LOOK_AHEAD != 8)
1436 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1437 #endif
1438 static inline int
1439 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1440 {
1441         volatile union ixgbe_adv_rx_desc *rxdp;
1442         struct ixgbe_rx_entry *rxep;
1443         struct rte_mbuf *mb;
1444         uint16_t pkt_len;
1445         uint64_t pkt_flags;
1446         int nb_dd;
1447         uint32_t s[LOOK_AHEAD];
1448         uint32_t pkt_info[LOOK_AHEAD];
1449         int i, j, nb_rx = 0;
1450         uint32_t status;
1451         uint64_t vlan_flags = rxq->vlan_flags;
1452
1453         /* get references to current descriptor and S/W ring entry */
1454         rxdp = &rxq->rx_ring[rxq->rx_tail];
1455         rxep = &rxq->sw_ring[rxq->rx_tail];
1456
1457         status = rxdp->wb.upper.status_error;
1458         /* check to make sure there is at least 1 packet to receive */
1459         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1460                 return 0;
1461
1462         /*
1463          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1464          * reference packets that are ready to be received.
1465          */
1466         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1467              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1468                 /* Read desc statuses backwards to avoid race condition */
1469                 for (j = 0; j < LOOK_AHEAD; j++)
1470                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1471
1472                 rte_smp_rmb();
1473
1474                 /* Compute how many status bits were set */
1475                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1476                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1477                         ;
1478
1479                 for (j = 0; j < nb_dd; j++)
1480                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1481                                                        lo_dword.data);
1482
1483                 nb_rx += nb_dd;
1484
1485                 /* Translate descriptor info to mbuf format */
1486                 for (j = 0; j < nb_dd; ++j) {
1487                         mb = rxep[j].mbuf;
1488                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1489                                   rxq->crc_len;
1490                         mb->data_len = pkt_len;
1491                         mb->pkt_len = pkt_len;
1492                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1493
1494                         /* convert descriptor fields to rte mbuf flags */
1495                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1496                                 vlan_flags);
1497                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1498                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1499                                         ((uint16_t)pkt_info[j]);
1500                         mb->ol_flags = pkt_flags;
1501                         mb->packet_type =
1502                                 ixgbe_rxd_pkt_info_to_pkt_type
1503                                         (pkt_info[j], rxq->pkt_type_mask);
1504
1505                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1506                                 mb->hash.rss = rte_le_to_cpu_32(
1507                                     rxdp[j].wb.lower.hi_dword.rss);
1508                         else if (pkt_flags & PKT_RX_FDIR) {
1509                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1510                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1511                                     IXGBE_ATR_HASH_MASK;
1512                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1513                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1514                         }
1515                 }
1516
1517                 /* Move mbuf pointers from the S/W ring to the stage */
1518                 for (j = 0; j < LOOK_AHEAD; ++j) {
1519                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1520                 }
1521
1522                 /* stop if all requested packets could not be received */
1523                 if (nb_dd != LOOK_AHEAD)
1524                         break;
1525         }
1526
1527         /* clear software ring entries so we can cleanup correctly */
1528         for (i = 0; i < nb_rx; ++i) {
1529                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1530         }
1531
1532
1533         return nb_rx;
1534 }
1535
1536 static inline int
1537 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1538 {
1539         volatile union ixgbe_adv_rx_desc *rxdp;
1540         struct ixgbe_rx_entry *rxep;
1541         struct rte_mbuf *mb;
1542         uint16_t alloc_idx;
1543         __le64 dma_addr;
1544         int diag, i;
1545
1546         /* allocate buffers in bulk directly into the S/W ring */
1547         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1548         rxep = &rxq->sw_ring[alloc_idx];
1549         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1550                                     rxq->rx_free_thresh);
1551         if (unlikely(diag != 0))
1552                 return -ENOMEM;
1553
1554         rxdp = &rxq->rx_ring[alloc_idx];
1555         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1556                 /* populate the static rte mbuf fields */
1557                 mb = rxep[i].mbuf;
1558                 if (reset_mbuf) {
1559                         mb->next = NULL;
1560                         mb->nb_segs = 1;
1561                         mb->port = rxq->port_id;
1562                 }
1563
1564                 rte_mbuf_refcnt_set(mb, 1);
1565                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1566
1567                 /* populate the descriptors */
1568                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1569                 rxdp[i].read.hdr_addr = 0;
1570                 rxdp[i].read.pkt_addr = dma_addr;
1571         }
1572
1573         /* update state of internal queue structure */
1574         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1575         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1576                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1577
1578         /* no errors */
1579         return 0;
1580 }
1581
1582 static inline uint16_t
1583 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1584                          uint16_t nb_pkts)
1585 {
1586         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1587         int i;
1588
1589         /* how many packets are ready to return? */
1590         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1591
1592         /* copy mbuf pointers to the application's packet list */
1593         for (i = 0; i < nb_pkts; ++i)
1594                 rx_pkts[i] = stage[i];
1595
1596         /* update internal queue state */
1597         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1598         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1599
1600         return nb_pkts;
1601 }
1602
1603 static inline uint16_t
1604 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1605              uint16_t nb_pkts)
1606 {
1607         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1608         uint16_t nb_rx = 0;
1609
1610         /* Any previously recv'd pkts will be returned from the Rx stage */
1611         if (rxq->rx_nb_avail)
1612                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1613
1614         /* Scan the H/W ring for packets to receive */
1615         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1616
1617         /* update internal queue state */
1618         rxq->rx_next_avail = 0;
1619         rxq->rx_nb_avail = nb_rx;
1620         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1621
1622         /* if required, allocate new buffers to replenish descriptors */
1623         if (rxq->rx_tail > rxq->rx_free_trigger) {
1624                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1625
1626                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1627                         int i, j;
1628
1629                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1630                                    "queue_id=%u", (unsigned) rxq->port_id,
1631                                    (unsigned) rxq->queue_id);
1632
1633                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1634                                 rxq->rx_free_thresh;
1635
1636                         /*
1637                          * Need to rewind any previous receives if we cannot
1638                          * allocate new buffers to replenish the old ones.
1639                          */
1640                         rxq->rx_nb_avail = 0;
1641                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1642                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1643                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1644
1645                         return 0;
1646                 }
1647
1648                 /* update tail pointer */
1649                 rte_wmb();
1650                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1651                                             cur_free_trigger);
1652         }
1653
1654         if (rxq->rx_tail >= rxq->nb_rx_desc)
1655                 rxq->rx_tail = 0;
1656
1657         /* received any packets this loop? */
1658         if (rxq->rx_nb_avail)
1659                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1660
1661         return 0;
1662 }
1663
1664 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1665 uint16_t
1666 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1667                            uint16_t nb_pkts)
1668 {
1669         uint16_t nb_rx;
1670
1671         if (unlikely(nb_pkts == 0))
1672                 return 0;
1673
1674         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1675                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1676
1677         /* request is relatively large, chunk it up */
1678         nb_rx = 0;
1679         while (nb_pkts) {
1680                 uint16_t ret, n;
1681
1682                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1683                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1684                 nb_rx = (uint16_t)(nb_rx + ret);
1685                 nb_pkts = (uint16_t)(nb_pkts - ret);
1686                 if (ret < n)
1687                         break;
1688         }
1689
1690         return nb_rx;
1691 }
1692
1693 uint16_t
1694 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1695                 uint16_t nb_pkts)
1696 {
1697         struct ixgbe_rx_queue *rxq;
1698         volatile union ixgbe_adv_rx_desc *rx_ring;
1699         volatile union ixgbe_adv_rx_desc *rxdp;
1700         struct ixgbe_rx_entry *sw_ring;
1701         struct ixgbe_rx_entry *rxe;
1702         struct rte_mbuf *rxm;
1703         struct rte_mbuf *nmb;
1704         union ixgbe_adv_rx_desc rxd;
1705         uint64_t dma_addr;
1706         uint32_t staterr;
1707         uint32_t pkt_info;
1708         uint16_t pkt_len;
1709         uint16_t rx_id;
1710         uint16_t nb_rx;
1711         uint16_t nb_hold;
1712         uint64_t pkt_flags;
1713         uint64_t vlan_flags;
1714
1715         nb_rx = 0;
1716         nb_hold = 0;
1717         rxq = rx_queue;
1718         rx_id = rxq->rx_tail;
1719         rx_ring = rxq->rx_ring;
1720         sw_ring = rxq->sw_ring;
1721         vlan_flags = rxq->vlan_flags;
1722         while (nb_rx < nb_pkts) {
1723                 /*
1724                  * The order of operations here is important as the DD status
1725                  * bit must not be read after any other descriptor fields.
1726                  * rx_ring and rxdp are pointing to volatile data so the order
1727                  * of accesses cannot be reordered by the compiler. If they were
1728                  * not volatile, they could be reordered which could lead to
1729                  * using invalid descriptor fields when read from rxd.
1730                  */
1731                 rxdp = &rx_ring[rx_id];
1732                 staterr = rxdp->wb.upper.status_error;
1733                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1734                         break;
1735                 rxd = *rxdp;
1736
1737                 /*
1738                  * End of packet.
1739                  *
1740                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1741                  * is likely to be invalid and to be dropped by the various
1742                  * validation checks performed by the network stack.
1743                  *
1744                  * Allocate a new mbuf to replenish the RX ring descriptor.
1745                  * If the allocation fails:
1746                  *    - arrange for that RX descriptor to be the first one
1747                  *      being parsed the next time the receive function is
1748                  *      invoked [on the same queue].
1749                  *
1750                  *    - Stop parsing the RX ring and return immediately.
1751                  *
1752                  * This policy do not drop the packet received in the RX
1753                  * descriptor for which the allocation of a new mbuf failed.
1754                  * Thus, it allows that packet to be later retrieved if
1755                  * mbuf have been freed in the mean time.
1756                  * As a side effect, holding RX descriptors instead of
1757                  * systematically giving them back to the NIC may lead to
1758                  * RX ring exhaustion situations.
1759                  * However, the NIC can gracefully prevent such situations
1760                  * to happen by sending specific "back-pressure" flow control
1761                  * frames to its peer(s).
1762                  */
1763                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1764                            "ext_err_stat=0x%08x pkt_len=%u",
1765                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1766                            (unsigned) rx_id, (unsigned) staterr,
1767                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1768
1769                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1770                 if (nmb == NULL) {
1771                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1772                                    "queue_id=%u", (unsigned) rxq->port_id,
1773                                    (unsigned) rxq->queue_id);
1774                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1775                         break;
1776                 }
1777
1778                 nb_hold++;
1779                 rxe = &sw_ring[rx_id];
1780                 rx_id++;
1781                 if (rx_id == rxq->nb_rx_desc)
1782                         rx_id = 0;
1783
1784                 /* Prefetch next mbuf while processing current one. */
1785                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1786
1787                 /*
1788                  * When next RX descriptor is on a cache-line boundary,
1789                  * prefetch the next 4 RX descriptors and the next 8 pointers
1790                  * to mbufs.
1791                  */
1792                 if ((rx_id & 0x3) == 0) {
1793                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1794                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1795                 }
1796
1797                 rxm = rxe->mbuf;
1798                 rxe->mbuf = nmb;
1799                 dma_addr =
1800                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1801                 rxdp->read.hdr_addr = 0;
1802                 rxdp->read.pkt_addr = dma_addr;
1803
1804                 /*
1805                  * Initialize the returned mbuf.
1806                  * 1) setup generic mbuf fields:
1807                  *    - number of segments,
1808                  *    - next segment,
1809                  *    - packet length,
1810                  *    - RX port identifier.
1811                  * 2) integrate hardware offload data, if any:
1812                  *    - RSS flag & hash,
1813                  *    - IP checksum flag,
1814                  *    - VLAN TCI, if any,
1815                  *    - error flags.
1816                  */
1817                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1818                                       rxq->crc_len);
1819                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1820                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1821                 rxm->nb_segs = 1;
1822                 rxm->next = NULL;
1823                 rxm->pkt_len = pkt_len;
1824                 rxm->data_len = pkt_len;
1825                 rxm->port = rxq->port_id;
1826
1827                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1828                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1829                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1830
1831                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1832                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1833                 pkt_flags = pkt_flags |
1834                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1835                 rxm->ol_flags = pkt_flags;
1836                 rxm->packet_type =
1837                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1838                                                        rxq->pkt_type_mask);
1839
1840                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1841                         rxm->hash.rss = rte_le_to_cpu_32(
1842                                                 rxd.wb.lower.hi_dword.rss);
1843                 else if (pkt_flags & PKT_RX_FDIR) {
1844                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1845                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1846                                         IXGBE_ATR_HASH_MASK;
1847                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1848                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1849                 }
1850                 /*
1851                  * Store the mbuf address into the next entry of the array
1852                  * of returned packets.
1853                  */
1854                 rx_pkts[nb_rx++] = rxm;
1855         }
1856         rxq->rx_tail = rx_id;
1857
1858         /*
1859          * If the number of free RX descriptors is greater than the RX free
1860          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1861          * register.
1862          * Update the RDT with the value of the last processed RX descriptor
1863          * minus 1, to guarantee that the RDT register is never equal to the
1864          * RDH register, which creates a "full" ring situtation from the
1865          * hardware point of view...
1866          */
1867         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1868         if (nb_hold > rxq->rx_free_thresh) {
1869                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1870                            "nb_hold=%u nb_rx=%u",
1871                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1872                            (unsigned) rx_id, (unsigned) nb_hold,
1873                            (unsigned) nb_rx);
1874                 rx_id = (uint16_t) ((rx_id == 0) ?
1875                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1876                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1877                 nb_hold = 0;
1878         }
1879         rxq->nb_rx_hold = nb_hold;
1880         return nb_rx;
1881 }
1882
1883 /**
1884  * Detect an RSC descriptor.
1885  */
1886 static inline uint32_t
1887 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1888 {
1889         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1890                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1891 }
1892
1893 /**
1894  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1895  *
1896  * Fill the following info in the HEAD buffer of the Rx cluster:
1897  *    - RX port identifier
1898  *    - hardware offload data, if any:
1899  *      - RSS flag & hash
1900  *      - IP checksum flag
1901  *      - VLAN TCI, if any
1902  *      - error flags
1903  * @head HEAD of the packet cluster
1904  * @desc HW descriptor to get data from
1905  * @rxq Pointer to the Rx queue
1906  */
1907 static inline void
1908 ixgbe_fill_cluster_head_buf(
1909         struct rte_mbuf *head,
1910         union ixgbe_adv_rx_desc *desc,
1911         struct ixgbe_rx_queue *rxq,
1912         uint32_t staterr)
1913 {
1914         uint32_t pkt_info;
1915         uint64_t pkt_flags;
1916
1917         head->port = rxq->port_id;
1918
1919         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1920          * set in the pkt_flags field.
1921          */
1922         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1923         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1924         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1925         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1926         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1927         head->ol_flags = pkt_flags;
1928         head->packet_type =
1929                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1930
1931         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1932                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1933         else if (pkt_flags & PKT_RX_FDIR) {
1934                 head->hash.fdir.hash =
1935                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1936                                                           & IXGBE_ATR_HASH_MASK;
1937                 head->hash.fdir.id =
1938                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1939         }
1940 }
1941
1942 /**
1943  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1944  *
1945  * @rx_queue Rx queue handle
1946  * @rx_pkts table of received packets
1947  * @nb_pkts size of rx_pkts table
1948  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1949  *
1950  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1951  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1952  *
1953  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1954  * 1) When non-EOP RSC completion arrives:
1955  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1956  *       segment's data length.
1957  *    b) Set the "next" pointer of the current segment to point to the segment
1958  *       at the NEXTP index.
1959  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1960  *       in the sw_rsc_ring.
1961  * 2) When EOP arrives we just update the cluster's total length and offload
1962  *    flags and deliver the cluster up to the upper layers. In our case - put it
1963  *    in the rx_pkts table.
1964  *
1965  * Returns the number of received packets/clusters (according to the "bulk
1966  * receive" interface).
1967  */
1968 static inline uint16_t
1969 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1970                     bool bulk_alloc)
1971 {
1972         struct ixgbe_rx_queue *rxq = rx_queue;
1973         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1974         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1975         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1976         uint16_t rx_id = rxq->rx_tail;
1977         uint16_t nb_rx = 0;
1978         uint16_t nb_hold = rxq->nb_rx_hold;
1979         uint16_t prev_id = rxq->rx_tail;
1980
1981         while (nb_rx < nb_pkts) {
1982                 bool eop;
1983                 struct ixgbe_rx_entry *rxe;
1984                 struct ixgbe_scattered_rx_entry *sc_entry;
1985                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1986                 struct ixgbe_rx_entry *next_rxe = NULL;
1987                 struct rte_mbuf *first_seg;
1988                 struct rte_mbuf *rxm;
1989                 struct rte_mbuf *nmb;
1990                 union ixgbe_adv_rx_desc rxd;
1991                 uint16_t data_len;
1992                 uint16_t next_id;
1993                 volatile union ixgbe_adv_rx_desc *rxdp;
1994                 uint32_t staterr;
1995
1996 next_desc:
1997                 /*
1998                  * The code in this whole file uses the volatile pointer to
1999                  * ensure the read ordering of the status and the rest of the
2000                  * descriptor fields (on the compiler level only!!!). This is so
2001                  * UGLY - why not to just use the compiler barrier instead? DPDK
2002                  * even has the rte_compiler_barrier() for that.
2003                  *
2004                  * But most importantly this is just wrong because this doesn't
2005                  * ensure memory ordering in a general case at all. For
2006                  * instance, DPDK is supposed to work on Power CPUs where
2007                  * compiler barrier may just not be enough!
2008                  *
2009                  * I tried to write only this function properly to have a
2010                  * starting point (as a part of an LRO/RSC series) but the
2011                  * compiler cursed at me when I tried to cast away the
2012                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2013                  * keeping it the way it is for now.
2014                  *
2015                  * The code in this file is broken in so many other places and
2016                  * will just not work on a big endian CPU anyway therefore the
2017                  * lines below will have to be revisited together with the rest
2018                  * of the ixgbe PMD.
2019                  *
2020                  * TODO:
2021                  *    - Get rid of "volatile" crap and let the compiler do its
2022                  *      job.
2023                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2024                  *      memory ordering below.
2025                  */
2026                 rxdp = &rx_ring[rx_id];
2027                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2028
2029                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2030                         break;
2031
2032                 rxd = *rxdp;
2033
2034                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2035                                   "staterr=0x%x data_len=%u",
2036                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2037                            rte_le_to_cpu_16(rxd.wb.upper.length));
2038
2039                 if (!bulk_alloc) {
2040                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2041                         if (nmb == NULL) {
2042                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2043                                                   "port_id=%u queue_id=%u",
2044                                            rxq->port_id, rxq->queue_id);
2045
2046                                 rte_eth_devices[rxq->port_id].data->
2047                                                         rx_mbuf_alloc_failed++;
2048                                 break;
2049                         }
2050                 } else if (nb_hold > rxq->rx_free_thresh) {
2051                         uint16_t next_rdt = rxq->rx_free_trigger;
2052
2053                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2054                                 rte_wmb();
2055                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2056                                                             next_rdt);
2057                                 nb_hold -= rxq->rx_free_thresh;
2058                         } else {
2059                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2060                                                   "port_id=%u queue_id=%u",
2061                                            rxq->port_id, rxq->queue_id);
2062
2063                                 rte_eth_devices[rxq->port_id].data->
2064                                                         rx_mbuf_alloc_failed++;
2065                                 break;
2066                         }
2067                 }
2068
2069                 nb_hold++;
2070                 rxe = &sw_ring[rx_id];
2071                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2072
2073                 next_id = rx_id + 1;
2074                 if (next_id == rxq->nb_rx_desc)
2075                         next_id = 0;
2076
2077                 /* Prefetch next mbuf while processing current one. */
2078                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2079
2080                 /*
2081                  * When next RX descriptor is on a cache-line boundary,
2082                  * prefetch the next 4 RX descriptors and the next 4 pointers
2083                  * to mbufs.
2084                  */
2085                 if ((next_id & 0x3) == 0) {
2086                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2087                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2088                 }
2089
2090                 rxm = rxe->mbuf;
2091
2092                 if (!bulk_alloc) {
2093                         __le64 dma =
2094                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2095                         /*
2096                          * Update RX descriptor with the physical address of the
2097                          * new data buffer of the new allocated mbuf.
2098                          */
2099                         rxe->mbuf = nmb;
2100
2101                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2102                         rxdp->read.hdr_addr = 0;
2103                         rxdp->read.pkt_addr = dma;
2104                 } else
2105                         rxe->mbuf = NULL;
2106
2107                 /*
2108                  * Set data length & data buffer address of mbuf.
2109                  */
2110                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2111                 rxm->data_len = data_len;
2112
2113                 if (!eop) {
2114                         uint16_t nextp_id;
2115                         /*
2116                          * Get next descriptor index:
2117                          *  - For RSC it's in the NEXTP field.
2118                          *  - For a scattered packet - it's just a following
2119                          *    descriptor.
2120                          */
2121                         if (ixgbe_rsc_count(&rxd))
2122                                 nextp_id =
2123                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2124                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2125                         else
2126                                 nextp_id = next_id;
2127
2128                         next_sc_entry = &sw_sc_ring[nextp_id];
2129                         next_rxe = &sw_ring[nextp_id];
2130                         rte_ixgbe_prefetch(next_rxe);
2131                 }
2132
2133                 sc_entry = &sw_sc_ring[rx_id];
2134                 first_seg = sc_entry->fbuf;
2135                 sc_entry->fbuf = NULL;
2136
2137                 /*
2138                  * If this is the first buffer of the received packet,
2139                  * set the pointer to the first mbuf of the packet and
2140                  * initialize its context.
2141                  * Otherwise, update the total length and the number of segments
2142                  * of the current scattered packet, and update the pointer to
2143                  * the last mbuf of the current packet.
2144                  */
2145                 if (first_seg == NULL) {
2146                         first_seg = rxm;
2147                         first_seg->pkt_len = data_len;
2148                         first_seg->nb_segs = 1;
2149                 } else {
2150                         first_seg->pkt_len += data_len;
2151                         first_seg->nb_segs++;
2152                 }
2153
2154                 prev_id = rx_id;
2155                 rx_id = next_id;
2156
2157                 /*
2158                  * If this is not the last buffer of the received packet, update
2159                  * the pointer to the first mbuf at the NEXTP entry in the
2160                  * sw_sc_ring and continue to parse the RX ring.
2161                  */
2162                 if (!eop && next_rxe) {
2163                         rxm->next = next_rxe->mbuf;
2164                         next_sc_entry->fbuf = first_seg;
2165                         goto next_desc;
2166                 }
2167
2168                 /*
2169                  * This is the last buffer of the received packet - return
2170                  * the current cluster to the user.
2171                  */
2172                 rxm->next = NULL;
2173
2174                 /* Initialize the first mbuf of the returned packet */
2175                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2176
2177                 /*
2178                  * Deal with the case, when HW CRC srip is disabled.
2179                  * That can't happen when LRO is enabled, but still could
2180                  * happen for scattered RX mode.
2181                  */
2182                 first_seg->pkt_len -= rxq->crc_len;
2183                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2184                         struct rte_mbuf *lp;
2185
2186                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2187                                 ;
2188
2189                         first_seg->nb_segs--;
2190                         lp->data_len -= rxq->crc_len - rxm->data_len;
2191                         lp->next = NULL;
2192                         rte_pktmbuf_free_seg(rxm);
2193                 } else
2194                         rxm->data_len -= rxq->crc_len;
2195
2196                 /* Prefetch data of first segment, if configured to do so. */
2197                 rte_packet_prefetch((char *)first_seg->buf_addr +
2198                         first_seg->data_off);
2199
2200                 /*
2201                  * Store the mbuf address into the next entry of the array
2202                  * of returned packets.
2203                  */
2204                 rx_pkts[nb_rx++] = first_seg;
2205         }
2206
2207         /*
2208          * Record index of the next RX descriptor to probe.
2209          */
2210         rxq->rx_tail = rx_id;
2211
2212         /*
2213          * If the number of free RX descriptors is greater than the RX free
2214          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2215          * register.
2216          * Update the RDT with the value of the last processed RX descriptor
2217          * minus 1, to guarantee that the RDT register is never equal to the
2218          * RDH register, which creates a "full" ring situtation from the
2219          * hardware point of view...
2220          */
2221         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2222                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2223                            "nb_hold=%u nb_rx=%u",
2224                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2225
2226                 rte_wmb();
2227                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2228                 nb_hold = 0;
2229         }
2230
2231         rxq->nb_rx_hold = nb_hold;
2232         return nb_rx;
2233 }
2234
2235 uint16_t
2236 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2237                                  uint16_t nb_pkts)
2238 {
2239         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2240 }
2241
2242 uint16_t
2243 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2244                                uint16_t nb_pkts)
2245 {
2246         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2247 }
2248
2249 /*********************************************************************
2250  *
2251  *  Queue management functions
2252  *
2253  **********************************************************************/
2254
2255 static void __attribute__((cold))
2256 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2257 {
2258         unsigned i;
2259
2260         if (txq->sw_ring != NULL) {
2261                 for (i = 0; i < txq->nb_tx_desc; i++) {
2262                         if (txq->sw_ring[i].mbuf != NULL) {
2263                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2264                                 txq->sw_ring[i].mbuf = NULL;
2265                         }
2266                 }
2267         }
2268 }
2269
2270 static void __attribute__((cold))
2271 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2272 {
2273         if (txq != NULL &&
2274             txq->sw_ring != NULL)
2275                 rte_free(txq->sw_ring);
2276 }
2277
2278 static void __attribute__((cold))
2279 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2280 {
2281         if (txq != NULL && txq->ops != NULL) {
2282                 txq->ops->release_mbufs(txq);
2283                 txq->ops->free_swring(txq);
2284                 rte_free(txq);
2285         }
2286 }
2287
2288 void __attribute__((cold))
2289 ixgbe_dev_tx_queue_release(void *txq)
2290 {
2291         ixgbe_tx_queue_release(txq);
2292 }
2293
2294 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2295 static void __attribute__((cold))
2296 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2297 {
2298         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2299         struct ixgbe_tx_entry *txe = txq->sw_ring;
2300         uint16_t prev, i;
2301
2302         /* Zero out HW ring memory */
2303         for (i = 0; i < txq->nb_tx_desc; i++) {
2304                 txq->tx_ring[i] = zeroed_desc;
2305         }
2306
2307         /* Initialize SW ring entries */
2308         prev = (uint16_t) (txq->nb_tx_desc - 1);
2309         for (i = 0; i < txq->nb_tx_desc; i++) {
2310                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2311
2312                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2313                 txe[i].mbuf = NULL;
2314                 txe[i].last_id = i;
2315                 txe[prev].next_id = i;
2316                 prev = i;
2317         }
2318
2319         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2320         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2321
2322         txq->tx_tail = 0;
2323         txq->nb_tx_used = 0;
2324         /*
2325          * Always allow 1 descriptor to be un-allocated to avoid
2326          * a H/W race condition
2327          */
2328         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2329         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2330         txq->ctx_curr = 0;
2331         memset((void *)&txq->ctx_cache, 0,
2332                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2333 }
2334
2335 static const struct ixgbe_txq_ops def_txq_ops = {
2336         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2337         .free_swring = ixgbe_tx_free_swring,
2338         .reset = ixgbe_reset_tx_queue,
2339 };
2340
2341 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2342  * the queue parameters. Used in tx_queue_setup by primary process and then
2343  * in dev_init by secondary process when attaching to an existing ethdev.
2344  */
2345 void __attribute__((cold))
2346 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2347 {
2348         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2349         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2350                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2351                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2352                 dev->tx_pkt_prepare = NULL;
2353 #ifdef RTE_IXGBE_INC_VECTOR
2354                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2355                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2356                                         ixgbe_txq_vec_setup(txq) == 0)) {
2357                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2358                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2359                 } else
2360 #endif
2361                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2362         } else {
2363                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2364                 PMD_INIT_LOG(DEBUG,
2365                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2366                                 (unsigned long)txq->txq_flags,
2367                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2368                 PMD_INIT_LOG(DEBUG,
2369                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2370                                 (unsigned long)txq->tx_rs_thresh,
2371                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2372                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2373                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2374         }
2375 }
2376
2377 int __attribute__((cold))
2378 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2379                          uint16_t queue_idx,
2380                          uint16_t nb_desc,
2381                          unsigned int socket_id,
2382                          const struct rte_eth_txconf *tx_conf)
2383 {
2384         const struct rte_memzone *tz;
2385         struct ixgbe_tx_queue *txq;
2386         struct ixgbe_hw     *hw;
2387         uint16_t tx_rs_thresh, tx_free_thresh;
2388
2389         PMD_INIT_FUNC_TRACE();
2390         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2391
2392         /*
2393          * Validate number of transmit descriptors.
2394          * It must not exceed hardware maximum, and must be multiple
2395          * of IXGBE_ALIGN.
2396          */
2397         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2398                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2399                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2400                 return -EINVAL;
2401         }
2402
2403         /*
2404          * The following two parameters control the setting of the RS bit on
2405          * transmit descriptors.
2406          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2407          * descriptors have been used.
2408          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2409          * descriptors are used or if the number of descriptors required
2410          * to transmit a packet is greater than the number of free TX
2411          * descriptors.
2412          * The following constraints must be satisfied:
2413          *  tx_rs_thresh must be greater than 0.
2414          *  tx_rs_thresh must be less than the size of the ring minus 2.
2415          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2416          *  tx_rs_thresh must be a divisor of the ring size.
2417          *  tx_free_thresh must be greater than 0.
2418          *  tx_free_thresh must be less than the size of the ring minus 3.
2419          * One descriptor in the TX ring is used as a sentinel to avoid a
2420          * H/W race condition, hence the maximum threshold constraints.
2421          * When set to zero use default values.
2422          */
2423         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2424                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2425         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2426                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2427         if (tx_rs_thresh >= (nb_desc - 2)) {
2428                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2429                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2430                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2431                         (int)dev->data->port_id, (int)queue_idx);
2432                 return -(EINVAL);
2433         }
2434         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2435                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2436                         "(tx_rs_thresh=%u port=%d queue=%d)",
2437                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2438                         (int)dev->data->port_id, (int)queue_idx);
2439                 return -(EINVAL);
2440         }
2441         if (tx_free_thresh >= (nb_desc - 3)) {
2442                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2443                              "tx_free_thresh must be less than the number of "
2444                              "TX descriptors minus 3. (tx_free_thresh=%u "
2445                              "port=%d queue=%d)",
2446                              (unsigned int)tx_free_thresh,
2447                              (int)dev->data->port_id, (int)queue_idx);
2448                 return -(EINVAL);
2449         }
2450         if (tx_rs_thresh > tx_free_thresh) {
2451                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2452                              "tx_free_thresh. (tx_free_thresh=%u "
2453                              "tx_rs_thresh=%u port=%d queue=%d)",
2454                              (unsigned int)tx_free_thresh,
2455                              (unsigned int)tx_rs_thresh,
2456                              (int)dev->data->port_id,
2457                              (int)queue_idx);
2458                 return -(EINVAL);
2459         }
2460         if ((nb_desc % tx_rs_thresh) != 0) {
2461                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2462                              "number of TX descriptors. (tx_rs_thresh=%u "
2463                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2464                              (int)dev->data->port_id, (int)queue_idx);
2465                 return -(EINVAL);
2466         }
2467
2468         /*
2469          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2470          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2471          * by the NIC and all descriptors are written back after the NIC
2472          * accumulates WTHRESH descriptors.
2473          */
2474         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2475                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2476                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2477                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2478                              (int)dev->data->port_id, (int)queue_idx);
2479                 return -(EINVAL);
2480         }
2481
2482         /* Free memory prior to re-allocation if needed... */
2483         if (dev->data->tx_queues[queue_idx] != NULL) {
2484                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2485                 dev->data->tx_queues[queue_idx] = NULL;
2486         }
2487
2488         /* First allocate the tx queue data structure */
2489         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2490                                  RTE_CACHE_LINE_SIZE, socket_id);
2491         if (txq == NULL)
2492                 return -ENOMEM;
2493
2494         /*
2495          * Allocate TX ring hardware descriptors. A memzone large enough to
2496          * handle the maximum ring size is allocated in order to allow for
2497          * resizing in later calls to the queue setup function.
2498          */
2499         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2500                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2501                         IXGBE_ALIGN, socket_id);
2502         if (tz == NULL) {
2503                 ixgbe_tx_queue_release(txq);
2504                 return -ENOMEM;
2505         }
2506
2507         txq->nb_tx_desc = nb_desc;
2508         txq->tx_rs_thresh = tx_rs_thresh;
2509         txq->tx_free_thresh = tx_free_thresh;
2510         txq->pthresh = tx_conf->tx_thresh.pthresh;
2511         txq->hthresh = tx_conf->tx_thresh.hthresh;
2512         txq->wthresh = tx_conf->tx_thresh.wthresh;
2513         txq->queue_id = queue_idx;
2514         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2515                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2516         txq->port_id = dev->data->port_id;
2517         txq->txq_flags = tx_conf->txq_flags;
2518         txq->ops = &def_txq_ops;
2519         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2520
2521         /*
2522          * Modification to set VFTDT for virtual function if vf is detected
2523          */
2524         if (hw->mac.type == ixgbe_mac_82599_vf ||
2525             hw->mac.type == ixgbe_mac_X540_vf ||
2526             hw->mac.type == ixgbe_mac_X550_vf ||
2527             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2528             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2529                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2530         else
2531                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2532
2533         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2534         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2535
2536         /* Allocate software ring */
2537         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2538                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2539                                 RTE_CACHE_LINE_SIZE, socket_id);
2540         if (txq->sw_ring == NULL) {
2541                 ixgbe_tx_queue_release(txq);
2542                 return -ENOMEM;
2543         }
2544         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2545                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2546
2547         /* set up vector or scalar TX function as appropriate */
2548         ixgbe_set_tx_function(dev, txq);
2549
2550         txq->ops->reset(txq);
2551
2552         dev->data->tx_queues[queue_idx] = txq;
2553
2554
2555         return 0;
2556 }
2557
2558 /**
2559  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2560  *
2561  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2562  * in the sw_rsc_ring is not set to NULL but rather points to the next
2563  * mbuf of this RSC aggregation (that has not been completed yet and still
2564  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2565  * will just free first "nb_segs" segments of the cluster explicitly by calling
2566  * an rte_pktmbuf_free_seg().
2567  *
2568  * @m scattered cluster head
2569  */
2570 static void __attribute__((cold))
2571 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2572 {
2573         uint8_t i, nb_segs = m->nb_segs;
2574         struct rte_mbuf *next_seg;
2575
2576         for (i = 0; i < nb_segs; i++) {
2577                 next_seg = m->next;
2578                 rte_pktmbuf_free_seg(m);
2579                 m = next_seg;
2580         }
2581 }
2582
2583 static void __attribute__((cold))
2584 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2585 {
2586         unsigned i;
2587
2588 #ifdef RTE_IXGBE_INC_VECTOR
2589         /* SSE Vector driver has a different way of releasing mbufs. */
2590         if (rxq->rx_using_sse) {
2591                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2592                 return;
2593         }
2594 #endif
2595
2596         if (rxq->sw_ring != NULL) {
2597                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2598                         if (rxq->sw_ring[i].mbuf != NULL) {
2599                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2600                                 rxq->sw_ring[i].mbuf = NULL;
2601                         }
2602                 }
2603                 if (rxq->rx_nb_avail) {
2604                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2605                                 struct rte_mbuf *mb;
2606
2607                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2608                                 rte_pktmbuf_free_seg(mb);
2609                         }
2610                         rxq->rx_nb_avail = 0;
2611                 }
2612         }
2613
2614         if (rxq->sw_sc_ring)
2615                 for (i = 0; i < rxq->nb_rx_desc; i++)
2616                         if (rxq->sw_sc_ring[i].fbuf) {
2617                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2618                                 rxq->sw_sc_ring[i].fbuf = NULL;
2619                         }
2620 }
2621
2622 static void __attribute__((cold))
2623 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2624 {
2625         if (rxq != NULL) {
2626                 ixgbe_rx_queue_release_mbufs(rxq);
2627                 rte_free(rxq->sw_ring);
2628                 rte_free(rxq->sw_sc_ring);
2629                 rte_free(rxq);
2630         }
2631 }
2632
2633 void __attribute__((cold))
2634 ixgbe_dev_rx_queue_release(void *rxq)
2635 {
2636         ixgbe_rx_queue_release(rxq);
2637 }
2638
2639 /*
2640  * Check if Rx Burst Bulk Alloc function can be used.
2641  * Return
2642  *        0: the preconditions are satisfied and the bulk allocation function
2643  *           can be used.
2644  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2645  *           function must be used.
2646  */
2647 static inline int __attribute__((cold))
2648 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2649 {
2650         int ret = 0;
2651
2652         /*
2653          * Make sure the following pre-conditions are satisfied:
2654          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2655          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2656          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2657          * Scattered packets are not supported.  This should be checked
2658          * outside of this function.
2659          */
2660         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2661                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2662                              "rxq->rx_free_thresh=%d, "
2663                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2664                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2665                 ret = -EINVAL;
2666         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2667                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2668                              "rxq->rx_free_thresh=%d, "
2669                              "rxq->nb_rx_desc=%d",
2670                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2671                 ret = -EINVAL;
2672         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2673                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2674                              "rxq->nb_rx_desc=%d, "
2675                              "rxq->rx_free_thresh=%d",
2676                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2677                 ret = -EINVAL;
2678         }
2679
2680         return ret;
2681 }
2682
2683 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2684 static void __attribute__((cold))
2685 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2686 {
2687         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2688         unsigned i;
2689         uint16_t len = rxq->nb_rx_desc;
2690
2691         /*
2692          * By default, the Rx queue setup function allocates enough memory for
2693          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2694          * extra memory at the end of the descriptor ring to be zero'd out.
2695          */
2696         if (adapter->rx_bulk_alloc_allowed)
2697                 /* zero out extra memory */
2698                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2699
2700         /*
2701          * Zero out HW ring memory. Zero out extra memory at the end of
2702          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2703          * reads extra memory as zeros.
2704          */
2705         for (i = 0; i < len; i++) {
2706                 rxq->rx_ring[i] = zeroed_desc;
2707         }
2708
2709         /*
2710          * initialize extra software ring entries. Space for these extra
2711          * entries is always allocated
2712          */
2713         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2714         for (i = rxq->nb_rx_desc; i < len; ++i) {
2715                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2716         }
2717
2718         rxq->rx_nb_avail = 0;
2719         rxq->rx_next_avail = 0;
2720         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2721         rxq->rx_tail = 0;
2722         rxq->nb_rx_hold = 0;
2723         rxq->pkt_first_seg = NULL;
2724         rxq->pkt_last_seg = NULL;
2725
2726 #ifdef RTE_IXGBE_INC_VECTOR
2727         rxq->rxrearm_start = 0;
2728         rxq->rxrearm_nb = 0;
2729 #endif
2730 }
2731
2732 int __attribute__((cold))
2733 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2734                          uint16_t queue_idx,
2735                          uint16_t nb_desc,
2736                          unsigned int socket_id,
2737                          const struct rte_eth_rxconf *rx_conf,
2738                          struct rte_mempool *mp)
2739 {
2740         const struct rte_memzone *rz;
2741         struct ixgbe_rx_queue *rxq;
2742         struct ixgbe_hw     *hw;
2743         uint16_t len;
2744         struct ixgbe_adapter *adapter =
2745                 (struct ixgbe_adapter *)dev->data->dev_private;
2746
2747         PMD_INIT_FUNC_TRACE();
2748         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2749
2750         /*
2751          * Validate number of receive descriptors.
2752          * It must not exceed hardware maximum, and must be multiple
2753          * of IXGBE_ALIGN.
2754          */
2755         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2756                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2757                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2758                 return -EINVAL;
2759         }
2760
2761         /* Free memory prior to re-allocation if needed... */
2762         if (dev->data->rx_queues[queue_idx] != NULL) {
2763                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2764                 dev->data->rx_queues[queue_idx] = NULL;
2765         }
2766
2767         /* First allocate the rx queue data structure */
2768         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2769                                  RTE_CACHE_LINE_SIZE, socket_id);
2770         if (rxq == NULL)
2771                 return -ENOMEM;
2772         rxq->mb_pool = mp;
2773         rxq->nb_rx_desc = nb_desc;
2774         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2775         rxq->queue_id = queue_idx;
2776         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2777                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2778         rxq->port_id = dev->data->port_id;
2779         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2780                                                         0 : ETHER_CRC_LEN);
2781         rxq->drop_en = rx_conf->rx_drop_en;
2782         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2783
2784         /*
2785          * The packet type in RX descriptor is different for different NICs.
2786          * Some bits are used for x550 but reserved for other NICS.
2787          * So set different masks for different NICs.
2788          */
2789         if (hw->mac.type == ixgbe_mac_X550 ||
2790             hw->mac.type == ixgbe_mac_X550EM_x ||
2791             hw->mac.type == ixgbe_mac_X550EM_a ||
2792             hw->mac.type == ixgbe_mac_X550_vf ||
2793             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2794             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2795                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2796         else
2797                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2798
2799         /*
2800          * Allocate RX ring hardware descriptors. A memzone large enough to
2801          * handle the maximum ring size is allocated in order to allow for
2802          * resizing in later calls to the queue setup function.
2803          */
2804         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2805                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2806         if (rz == NULL) {
2807                 ixgbe_rx_queue_release(rxq);
2808                 return -ENOMEM;
2809         }
2810
2811         /*
2812          * Zero init all the descriptors in the ring.
2813          */
2814         memset(rz->addr, 0, RX_RING_SZ);
2815
2816         /*
2817          * Modified to setup VFRDT for Virtual Function
2818          */
2819         if (hw->mac.type == ixgbe_mac_82599_vf ||
2820             hw->mac.type == ixgbe_mac_X540_vf ||
2821             hw->mac.type == ixgbe_mac_X550_vf ||
2822             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2823             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2824                 rxq->rdt_reg_addr =
2825                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2826                 rxq->rdh_reg_addr =
2827                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2828         } else {
2829                 rxq->rdt_reg_addr =
2830                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2831                 rxq->rdh_reg_addr =
2832                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2833         }
2834
2835         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2836         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2837
2838         /*
2839          * Certain constraints must be met in order to use the bulk buffer
2840          * allocation Rx burst function. If any of Rx queues doesn't meet them
2841          * the feature should be disabled for the whole port.
2842          */
2843         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2844                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2845                                     "preconditions - canceling the feature for "
2846                                     "the whole port[%d]",
2847                              rxq->queue_id, rxq->port_id);
2848                 adapter->rx_bulk_alloc_allowed = false;
2849         }
2850
2851         /*
2852          * Allocate software ring. Allow for space at the end of the
2853          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2854          * function does not access an invalid memory region.
2855          */
2856         len = nb_desc;
2857         if (adapter->rx_bulk_alloc_allowed)
2858                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2859
2860         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2861                                           sizeof(struct ixgbe_rx_entry) * len,
2862                                           RTE_CACHE_LINE_SIZE, socket_id);
2863         if (!rxq->sw_ring) {
2864                 ixgbe_rx_queue_release(rxq);
2865                 return -ENOMEM;
2866         }
2867
2868         /*
2869          * Always allocate even if it's not going to be needed in order to
2870          * simplify the code.
2871          *
2872          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2873          * be requested in ixgbe_dev_rx_init(), which is called later from
2874          * dev_start() flow.
2875          */
2876         rxq->sw_sc_ring =
2877                 rte_zmalloc_socket("rxq->sw_sc_ring",
2878                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2879                                    RTE_CACHE_LINE_SIZE, socket_id);
2880         if (!rxq->sw_sc_ring) {
2881                 ixgbe_rx_queue_release(rxq);
2882                 return -ENOMEM;
2883         }
2884
2885         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2886                             "dma_addr=0x%"PRIx64,
2887                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2888                      rxq->rx_ring_phys_addr);
2889
2890         if (!rte_is_power_of_2(nb_desc)) {
2891                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2892                                     "preconditions - canceling the feature for "
2893                                     "the whole port[%d]",
2894                              rxq->queue_id, rxq->port_id);
2895                 adapter->rx_vec_allowed = false;
2896         } else
2897                 ixgbe_rxq_vec_setup(rxq);
2898
2899         dev->data->rx_queues[queue_idx] = rxq;
2900
2901         ixgbe_reset_rx_queue(adapter, rxq);
2902
2903         return 0;
2904 }
2905
2906 uint32_t
2907 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2908 {
2909 #define IXGBE_RXQ_SCAN_INTERVAL 4
2910         volatile union ixgbe_adv_rx_desc *rxdp;
2911         struct ixgbe_rx_queue *rxq;
2912         uint32_t desc = 0;
2913
2914         if (rx_queue_id >= dev->data->nb_rx_queues) {
2915                 PMD_RX_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
2916                 return 0;
2917         }
2918
2919         rxq = dev->data->rx_queues[rx_queue_id];
2920         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2921
2922         while ((desc < rxq->nb_rx_desc) &&
2923                 (rxdp->wb.upper.status_error &
2924                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2925                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2926                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2927                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2928                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2929                                 desc - rxq->nb_rx_desc]);
2930         }
2931
2932         return desc;
2933 }
2934
2935 int
2936 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2937 {
2938         volatile union ixgbe_adv_rx_desc *rxdp;
2939         struct ixgbe_rx_queue *rxq = rx_queue;
2940         uint32_t desc;
2941
2942         if (unlikely(offset >= rxq->nb_rx_desc))
2943                 return 0;
2944         desc = rxq->rx_tail + offset;
2945         if (desc >= rxq->nb_rx_desc)
2946                 desc -= rxq->nb_rx_desc;
2947
2948         rxdp = &rxq->rx_ring[desc];
2949         return !!(rxdp->wb.upper.status_error &
2950                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2951 }
2952
2953 void __attribute__((cold))
2954 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
2955 {
2956         unsigned i;
2957         struct ixgbe_adapter *adapter =
2958                 (struct ixgbe_adapter *)dev->data->dev_private;
2959
2960         PMD_INIT_FUNC_TRACE();
2961
2962         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2963                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
2964
2965                 if (txq != NULL) {
2966                         txq->ops->release_mbufs(txq);
2967                         txq->ops->reset(txq);
2968                 }
2969         }
2970
2971         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2972                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
2973
2974                 if (rxq != NULL) {
2975                         ixgbe_rx_queue_release_mbufs(rxq);
2976                         ixgbe_reset_rx_queue(adapter, rxq);
2977                 }
2978         }
2979 }
2980
2981 void
2982 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
2983 {
2984         unsigned i;
2985
2986         PMD_INIT_FUNC_TRACE();
2987
2988         for (i = 0; i < dev->data->nb_rx_queues; i++) {
2989                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
2990                 dev->data->rx_queues[i] = NULL;
2991         }
2992         dev->data->nb_rx_queues = 0;
2993
2994         for (i = 0; i < dev->data->nb_tx_queues; i++) {
2995                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
2996                 dev->data->tx_queues[i] = NULL;
2997         }
2998         dev->data->nb_tx_queues = 0;
2999 }
3000
3001 /*********************************************************************
3002  *
3003  *  Device RX/TX init functions
3004  *
3005  **********************************************************************/
3006
3007 /**
3008  * Receive Side Scaling (RSS)
3009  * See section 7.1.2.8 in the following document:
3010  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3011  *
3012  * Principles:
3013  * The source and destination IP addresses of the IP header and the source
3014  * and destination ports of TCP/UDP headers, if any, of received packets are
3015  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3016  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3017  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3018  * RSS output index which is used as the RX queue index where to store the
3019  * received packets.
3020  * The following output is supplied in the RX write-back descriptor:
3021  *     - 32-bit result of the Microsoft RSS hash function,
3022  *     - 4-bit RSS type field.
3023  */
3024
3025 /*
3026  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3027  * Used as the default key.
3028  */
3029 static uint8_t rss_intel_key[40] = {
3030         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3031         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3032         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3033         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3034         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3035 };
3036
3037 static void
3038 ixgbe_rss_disable(struct rte_eth_dev *dev)
3039 {
3040         struct ixgbe_hw *hw;
3041         uint32_t mrqc;
3042         uint32_t mrqc_reg;
3043
3044         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3045         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3046         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3047         mrqc &= ~IXGBE_MRQC_RSSEN;
3048         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3049 }
3050
3051 static void
3052 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3053 {
3054         uint8_t  *hash_key;
3055         uint32_t mrqc;
3056         uint32_t rss_key;
3057         uint64_t rss_hf;
3058         uint16_t i;
3059         uint32_t mrqc_reg;
3060         uint32_t rssrk_reg;
3061
3062         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3063         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3064
3065         hash_key = rss_conf->rss_key;
3066         if (hash_key != NULL) {
3067                 /* Fill in RSS hash key */
3068                 for (i = 0; i < 10; i++) {
3069                         rss_key  = hash_key[(i * 4)];
3070                         rss_key |= hash_key[(i * 4) + 1] << 8;
3071                         rss_key |= hash_key[(i * 4) + 2] << 16;
3072                         rss_key |= hash_key[(i * 4) + 3] << 24;
3073                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3074                 }
3075         }
3076
3077         /* Set configured hashing protocols in MRQC register */
3078         rss_hf = rss_conf->rss_hf;
3079         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3080         if (rss_hf & ETH_RSS_IPV4)
3081                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3082         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3083                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3084         if (rss_hf & ETH_RSS_IPV6)
3085                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3086         if (rss_hf & ETH_RSS_IPV6_EX)
3087                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3088         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3089                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3090         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3091                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3092         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3093                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3094         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3095                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3096         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3097                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3098         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3099 }
3100
3101 int
3102 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3103                           struct rte_eth_rss_conf *rss_conf)
3104 {
3105         struct ixgbe_hw *hw;
3106         uint32_t mrqc;
3107         uint64_t rss_hf;
3108         uint32_t mrqc_reg;
3109
3110         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3111
3112         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3113                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3114                         "NIC.");
3115                 return -ENOTSUP;
3116         }
3117         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3118
3119         /*
3120          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3121          *     "RSS enabling cannot be done dynamically while it must be
3122          *      preceded by a software reset"
3123          * Before changing anything, first check that the update RSS operation
3124          * does not attempt to disable RSS, if RSS was enabled at
3125          * initialization time, or does not attempt to enable RSS, if RSS was
3126          * disabled at initialization time.
3127          */
3128         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3129         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3130         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3131                 if (rss_hf != 0) /* Enable RSS */
3132                         return -(EINVAL);
3133                 return 0; /* Nothing to do */
3134         }
3135         /* RSS enabled */
3136         if (rss_hf == 0) /* Disable RSS */
3137                 return -(EINVAL);
3138         ixgbe_hw_rss_hash_set(hw, rss_conf);
3139         return 0;
3140 }
3141
3142 int
3143 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3144                             struct rte_eth_rss_conf *rss_conf)
3145 {
3146         struct ixgbe_hw *hw;
3147         uint8_t *hash_key;
3148         uint32_t mrqc;
3149         uint32_t rss_key;
3150         uint64_t rss_hf;
3151         uint16_t i;
3152         uint32_t mrqc_reg;
3153         uint32_t rssrk_reg;
3154
3155         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3156         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3157         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3158         hash_key = rss_conf->rss_key;
3159         if (hash_key != NULL) {
3160                 /* Return RSS hash key */
3161                 for (i = 0; i < 10; i++) {
3162                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3163                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3164                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3165                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3166                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3167                 }
3168         }
3169
3170         /* Get RSS functions configured in MRQC register */
3171         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3172         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3173                 rss_conf->rss_hf = 0;
3174                 return 0;
3175         }
3176         rss_hf = 0;
3177         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3178                 rss_hf |= ETH_RSS_IPV4;
3179         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3180                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3181         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3182                 rss_hf |= ETH_RSS_IPV6;
3183         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3184                 rss_hf |= ETH_RSS_IPV6_EX;
3185         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3186                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3187         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3188                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3189         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3190                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3191         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3192                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3193         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3194                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3195         rss_conf->rss_hf = rss_hf;
3196         return 0;
3197 }
3198
3199 static void
3200 ixgbe_rss_configure(struct rte_eth_dev *dev)
3201 {
3202         struct rte_eth_rss_conf rss_conf;
3203         struct ixgbe_hw *hw;
3204         uint32_t reta;
3205         uint16_t i;
3206         uint16_t j;
3207         uint16_t sp_reta_size;
3208         uint32_t reta_reg;
3209
3210         PMD_INIT_FUNC_TRACE();
3211         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3212
3213         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3214
3215         /*
3216          * Fill in redirection table
3217          * The byte-swap is needed because NIC registers are in
3218          * little-endian order.
3219          */
3220         reta = 0;
3221         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3222                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3223
3224                 if (j == dev->data->nb_rx_queues)
3225                         j = 0;
3226                 reta = (reta << 8) | j;
3227                 if ((i & 3) == 3)
3228                         IXGBE_WRITE_REG(hw, reta_reg,
3229                                         rte_bswap32(reta));
3230         }
3231
3232         /*
3233          * Configure the RSS key and the RSS protocols used to compute
3234          * the RSS hash of input packets.
3235          */
3236         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3237         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3238                 ixgbe_rss_disable(dev);
3239                 return;
3240         }
3241         if (rss_conf.rss_key == NULL)
3242                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3243         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3244 }
3245
3246 #define NUM_VFTA_REGISTERS 128
3247 #define NIC_RX_BUFFER_SIZE 0x200
3248 #define X550_RX_BUFFER_SIZE 0x180
3249
3250 static void
3251 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3252 {
3253         struct rte_eth_vmdq_dcb_conf *cfg;
3254         struct ixgbe_hw *hw;
3255         enum rte_eth_nb_pools num_pools;
3256         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3257         uint16_t pbsize;
3258         uint8_t nb_tcs; /* number of traffic classes */
3259         int i;
3260
3261         PMD_INIT_FUNC_TRACE();
3262         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3263         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3264         num_pools = cfg->nb_queue_pools;
3265         /* Check we have a valid number of pools */
3266         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3267                 ixgbe_rss_disable(dev);
3268                 return;
3269         }
3270         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3271         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3272
3273         /*
3274          * RXPBSIZE
3275          * split rx buffer up into sections, each for 1 traffic class
3276          */
3277         switch (hw->mac.type) {
3278         case ixgbe_mac_X550:
3279         case ixgbe_mac_X550EM_x:
3280         case ixgbe_mac_X550EM_a:
3281                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3282                 break;
3283         default:
3284                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3285                 break;
3286         }
3287         for (i = 0; i < nb_tcs; i++) {
3288                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3289
3290                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3291                 /* clear 10 bits. */
3292                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3293                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3294         }
3295         /* zero alloc all unused TCs */
3296         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3297                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3298
3299                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3300                 /* clear 10 bits. */
3301                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3302         }
3303
3304         /* MRQC: enable vmdq and dcb */
3305         mrqc = (num_pools == ETH_16_POOLS) ?
3306                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3307         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3308
3309         /* PFVTCTL: turn on virtualisation and set the default pool */
3310         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3311         if (cfg->enable_default_pool) {
3312                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3313         } else {
3314                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3315         }
3316
3317         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3318
3319         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3320         queue_mapping = 0;
3321         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3322                 /*
3323                  * mapping is done with 3 bits per priority,
3324                  * so shift by i*3 each time
3325                  */
3326                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3327
3328         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3329
3330         /* RTRPCS: DCB related */
3331         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3332
3333         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3334         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3335         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3336         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3337
3338         /* VFTA - enable all vlan filters */
3339         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3340                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3341         }
3342
3343         /* VFRE: pool enabling for receive - 16 or 32 */
3344         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3345                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3346
3347         /*
3348          * MPSAR - allow pools to read specific mac addresses
3349          * In this case, all pools should be able to read from mac addr 0
3350          */
3351         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3352         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3353
3354         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3355         for (i = 0; i < cfg->nb_pool_maps; i++) {
3356                 /* set vlan id in VF register and set the valid bit */
3357                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3358                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3359                 /*
3360                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3361                  * pools, we only need to use the first half of the register
3362                  * i.e. bits 0-31
3363                  */
3364                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3365         }
3366 }
3367
3368 /**
3369  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3370  * @dev: pointer to eth_dev structure
3371  * @dcb_config: pointer to ixgbe_dcb_config structure
3372  */
3373 static void
3374 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3375                        struct ixgbe_dcb_config *dcb_config)
3376 {
3377         uint32_t reg;
3378         uint32_t q;
3379         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3380
3381         PMD_INIT_FUNC_TRACE();
3382         if (hw->mac.type != ixgbe_mac_82598EB) {
3383                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3384                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3385                 reg |= IXGBE_RTTDCS_ARBDIS;
3386                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3387
3388                 /* Enable DCB for Tx with 8 TCs */
3389                 if (dcb_config->num_tcs.pg_tcs == 8) {
3390                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3391                 } else {
3392                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3393                 }
3394                 if (dcb_config->vt_mode)
3395                         reg |= IXGBE_MTQC_VT_ENA;
3396                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3397
3398                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3399                         /* Disable drop for all queues in VMDQ mode*/
3400                         for (q = 0; q < 128; q++)
3401                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3402                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3403                 } else {
3404                         /* Enable drop for all queues in SRIOV mode */
3405                         for (q = 0; q < 128; q++)
3406                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3407                                                 (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE));
3408                 }
3409
3410                 /* Enable the Tx desc arbiter */
3411                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3412                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3413                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3414
3415                 /* Enable Security TX Buffer IFG for DCB */
3416                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3417                 reg |= IXGBE_SECTX_DCB;
3418                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3419         }
3420 }
3421
3422 /**
3423  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3424  * @dev: pointer to rte_eth_dev structure
3425  * @dcb_config: pointer to ixgbe_dcb_config structure
3426  */
3427 static void
3428 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3429                         struct ixgbe_dcb_config *dcb_config)
3430 {
3431         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3432                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3433         struct ixgbe_hw *hw =
3434                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3435
3436         PMD_INIT_FUNC_TRACE();
3437         if (hw->mac.type != ixgbe_mac_82598EB)
3438                 /*PF VF Transmit Enable*/
3439                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3440                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3441
3442         /*Configure general DCB TX parameters*/
3443         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3444 }
3445
3446 static void
3447 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3448                         struct ixgbe_dcb_config *dcb_config)
3449 {
3450         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3451                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3452         struct ixgbe_dcb_tc_config *tc;
3453         uint8_t i, j;
3454
3455         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3456         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3457                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3458                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3459         } else {
3460                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3461                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3462         }
3463         /* User Priority to Traffic Class mapping */
3464         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3465                 j = vmdq_rx_conf->dcb_tc[i];
3466                 tc = &dcb_config->tc_config[j];
3467                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3468                                                 (uint8_t)(1 << j);
3469         }
3470 }
3471
3472 static void
3473 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3474                         struct ixgbe_dcb_config *dcb_config)
3475 {
3476         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3477                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3478         struct ixgbe_dcb_tc_config *tc;
3479         uint8_t i, j;
3480
3481         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3482         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3483                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3484                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3485         } else {
3486                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3487                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3488         }
3489
3490         /* User Priority to Traffic Class mapping */
3491         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3492                 j = vmdq_tx_conf->dcb_tc[i];
3493                 tc = &dcb_config->tc_config[j];
3494                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3495                                                 (uint8_t)(1 << j);
3496         }
3497 }
3498
3499 static void
3500 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3501                 struct ixgbe_dcb_config *dcb_config)
3502 {
3503         struct rte_eth_dcb_rx_conf *rx_conf =
3504                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3505         struct ixgbe_dcb_tc_config *tc;
3506         uint8_t i, j;
3507
3508         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3509         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3510
3511         /* User Priority to Traffic Class mapping */
3512         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3513                 j = rx_conf->dcb_tc[i];
3514                 tc = &dcb_config->tc_config[j];
3515                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3516                                                 (uint8_t)(1 << j);
3517         }
3518 }
3519
3520 static void
3521 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3522                 struct ixgbe_dcb_config *dcb_config)
3523 {
3524         struct rte_eth_dcb_tx_conf *tx_conf =
3525                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3526         struct ixgbe_dcb_tc_config *tc;
3527         uint8_t i, j;
3528
3529         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3530         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3531
3532         /* User Priority to Traffic Class mapping */
3533         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3534                 j = tx_conf->dcb_tc[i];
3535                 tc = &dcb_config->tc_config[j];
3536                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3537                                                 (uint8_t)(1 << j);
3538         }
3539 }
3540
3541 /**
3542  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3543  * @hw: pointer to hardware structure
3544  * @dcb_config: pointer to ixgbe_dcb_config structure
3545  */
3546 static void
3547 ixgbe_dcb_rx_hw_config(struct ixgbe_hw *hw,
3548                struct ixgbe_dcb_config *dcb_config)
3549 {
3550         uint32_t reg;
3551         uint32_t vlanctrl;
3552         uint8_t i;
3553
3554         PMD_INIT_FUNC_TRACE();
3555         /*
3556          * Disable the arbiter before changing parameters
3557          * (always enable recycle mode; WSP)
3558          */
3559         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3560         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3561
3562         if (hw->mac.type != ixgbe_mac_82598EB) {
3563                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3564                 if (dcb_config->num_tcs.pg_tcs == 4) {
3565                         if (dcb_config->vt_mode)
3566                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3567                                         IXGBE_MRQC_VMDQRT4TCEN;
3568                         else {
3569                                 /* no matter the mode is DCB or DCB_RSS, just
3570                                  * set the MRQE to RSSXTCEN. RSS is controlled
3571                                  * by RSS_FIELD
3572                                  */
3573                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3574                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3575                                         IXGBE_MRQC_RTRSS4TCEN;
3576                         }
3577                 }
3578                 if (dcb_config->num_tcs.pg_tcs == 8) {
3579                         if (dcb_config->vt_mode)
3580                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3581                                         IXGBE_MRQC_VMDQRT8TCEN;
3582                         else {
3583                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3584                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3585                                         IXGBE_MRQC_RTRSS8TCEN;
3586                         }
3587                 }
3588
3589                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3590         }
3591
3592         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3593         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3594         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3595         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3596
3597         /* VFTA - enable all vlan filters */
3598         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3599                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3600         }
3601
3602         /*
3603          * Configure Rx packet plane (recycle mode; WSP) and
3604          * enable arbiter
3605          */
3606         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3607         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3608 }
3609
3610 static void
3611 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3612                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3613 {
3614         switch (hw->mac.type) {
3615         case ixgbe_mac_82598EB:
3616                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3617                 break;
3618         case ixgbe_mac_82599EB:
3619         case ixgbe_mac_X540:
3620         case ixgbe_mac_X550:
3621         case ixgbe_mac_X550EM_x:
3622         case ixgbe_mac_X550EM_a:
3623                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3624                                                   tsa, map);
3625                 break;
3626         default:
3627                 break;
3628         }
3629 }
3630
3631 static void
3632 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3633                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3634 {
3635         switch (hw->mac.type) {
3636         case ixgbe_mac_82598EB:
3637                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3638                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3639                 break;
3640         case ixgbe_mac_82599EB:
3641         case ixgbe_mac_X540:
3642         case ixgbe_mac_X550:
3643         case ixgbe_mac_X550EM_x:
3644         case ixgbe_mac_X550EM_a:
3645                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3646                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3647                 break;
3648         default:
3649                 break;
3650         }
3651 }
3652
3653 #define DCB_RX_CONFIG  1
3654 #define DCB_TX_CONFIG  1
3655 #define DCB_TX_PB      1024
3656 /**
3657  * ixgbe_dcb_hw_configure - Enable DCB and configure
3658  * general DCB in VT mode and non-VT mode parameters
3659  * @dev: pointer to rte_eth_dev structure
3660  * @dcb_config: pointer to ixgbe_dcb_config structure
3661  */
3662 static int
3663 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3664                         struct ixgbe_dcb_config *dcb_config)
3665 {
3666         int     ret = 0;
3667         uint8_t i, pfc_en, nb_tcs;
3668         uint16_t pbsize, rx_buffer_size;
3669         uint8_t config_dcb_rx = 0;
3670         uint8_t config_dcb_tx = 0;
3671         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3672         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3673         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3674         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3675         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3676         struct ixgbe_dcb_tc_config *tc;
3677         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3678         struct ixgbe_hw *hw =
3679                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3680
3681         switch (dev->data->dev_conf.rxmode.mq_mode) {
3682         case ETH_MQ_RX_VMDQ_DCB:
3683                 dcb_config->vt_mode = true;
3684                 if (hw->mac.type != ixgbe_mac_82598EB) {
3685                         config_dcb_rx = DCB_RX_CONFIG;
3686                         /*
3687                          *get dcb and VT rx configuration parameters
3688                          *from rte_eth_conf
3689                          */
3690                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3691                         /*Configure general VMDQ and DCB RX parameters*/
3692                         ixgbe_vmdq_dcb_configure(dev);
3693                 }
3694                 break;
3695         case ETH_MQ_RX_DCB:
3696         case ETH_MQ_RX_DCB_RSS:
3697                 dcb_config->vt_mode = false;
3698                 config_dcb_rx = DCB_RX_CONFIG;
3699                 /* Get dcb TX configuration parameters from rte_eth_conf */
3700                 ixgbe_dcb_rx_config(dev, dcb_config);
3701                 /*Configure general DCB RX parameters*/
3702                 ixgbe_dcb_rx_hw_config(hw, dcb_config);
3703                 break;
3704         default:
3705                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3706                 break;
3707         }
3708         switch (dev->data->dev_conf.txmode.mq_mode) {
3709         case ETH_MQ_TX_VMDQ_DCB:
3710                 dcb_config->vt_mode = true;
3711                 config_dcb_tx = DCB_TX_CONFIG;
3712                 /* get DCB and VT TX configuration parameters
3713                  * from rte_eth_conf
3714                  */
3715                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3716                 /*Configure general VMDQ and DCB TX parameters*/
3717                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3718                 break;
3719
3720         case ETH_MQ_TX_DCB:
3721                 dcb_config->vt_mode = false;
3722                 config_dcb_tx = DCB_TX_CONFIG;
3723                 /*get DCB TX configuration parameters from rte_eth_conf*/
3724                 ixgbe_dcb_tx_config(dev, dcb_config);
3725                 /*Configure general DCB TX parameters*/
3726                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3727                 break;
3728         default:
3729                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3730                 break;
3731         }
3732
3733         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3734         /* Unpack map */
3735         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3736         if (nb_tcs == ETH_4_TCS) {
3737                 /* Avoid un-configured priority mapping to TC0 */
3738                 uint8_t j = 4;
3739                 uint8_t mask = 0xFF;
3740
3741                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3742                         mask = (uint8_t)(mask & (~(1 << map[i])));
3743                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3744                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3745                                 map[j++] = i;
3746                         mask >>= 1;
3747                 }
3748                 /* Re-configure 4 TCs BW */
3749                 for (i = 0; i < nb_tcs; i++) {
3750                         tc = &dcb_config->tc_config[i];
3751                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3752                                                 (uint8_t)(100 / nb_tcs);
3753                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3754                                                 (uint8_t)(100 / nb_tcs);
3755                 }
3756                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3757                         tc = &dcb_config->tc_config[i];
3758                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3759                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3760                 }
3761         }
3762
3763         switch (hw->mac.type) {
3764         case ixgbe_mac_X550:
3765         case ixgbe_mac_X550EM_x:
3766         case ixgbe_mac_X550EM_a:
3767                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3768                 break;
3769         default:
3770                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3771                 break;
3772         }
3773
3774         if (config_dcb_rx) {
3775                 /* Set RX buffer size */
3776                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3777                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3778
3779                 for (i = 0; i < nb_tcs; i++) {
3780                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3781                 }
3782                 /* zero alloc all unused TCs */
3783                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3784                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3785                 }
3786         }
3787         if (config_dcb_tx) {
3788                 /* Only support an equally distributed
3789                  *  Tx packet buffer strategy.
3790                  */
3791                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3792                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3793
3794                 for (i = 0; i < nb_tcs; i++) {
3795                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3796                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3797                 }
3798                 /* Clear unused TCs, if any, to zero buffer size*/
3799                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3800                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3801                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3802                 }
3803         }
3804
3805         /*Calculates traffic class credits*/
3806         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3807                                 IXGBE_DCB_TX_CONFIG);
3808         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3809                                 IXGBE_DCB_RX_CONFIG);
3810
3811         if (config_dcb_rx) {
3812                 /* Unpack CEE standard containers */
3813                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3814                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3815                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3816                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3817                 /* Configure PG(ETS) RX */
3818                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3819         }
3820
3821         if (config_dcb_tx) {
3822                 /* Unpack CEE standard containers */
3823                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3824                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3825                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3826                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3827                 /* Configure PG(ETS) TX */
3828                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3829         }
3830
3831         /*Configure queue statistics registers*/
3832         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3833
3834         /* Check if the PFC is supported */
3835         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3836                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3837                 for (i = 0; i < nb_tcs; i++) {
3838                         /*
3839                         * If the TC count is 8,and the default high_water is 48,
3840                         * the low_water is 16 as default.
3841                         */
3842                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3843                         hw->fc.low_water[i] = pbsize / 4;
3844                         /* Enable pfc for this TC */
3845                         tc = &dcb_config->tc_config[i];
3846                         tc->pfc = ixgbe_dcb_pfc_enabled;
3847                 }
3848                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3849                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3850                         pfc_en &= 0x0F;
3851                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3852         }
3853
3854         return ret;
3855 }
3856
3857 /**
3858  * ixgbe_configure_dcb - Configure DCB  Hardware
3859  * @dev: pointer to rte_eth_dev
3860  */
3861 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3862 {
3863         struct ixgbe_dcb_config *dcb_cfg =
3864                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3865         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3866
3867         PMD_INIT_FUNC_TRACE();
3868
3869         /* check support mq_mode for DCB */
3870         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3871             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3872             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3873                 return;
3874
3875         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3876                 return;
3877
3878         /** Configure DCB hardware **/
3879         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3880 }
3881
3882 /*
3883  * VMDq only support for 10 GbE NIC.
3884  */
3885 static void
3886 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3887 {
3888         struct rte_eth_vmdq_rx_conf *cfg;
3889         struct ixgbe_hw *hw;
3890         enum rte_eth_nb_pools num_pools;
3891         uint32_t mrqc, vt_ctl, vlanctrl;
3892         uint32_t vmolr = 0;
3893         int i;
3894
3895         PMD_INIT_FUNC_TRACE();
3896         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3897         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3898         num_pools = cfg->nb_queue_pools;
3899
3900         ixgbe_rss_disable(dev);
3901
3902         /* MRQC: enable vmdq */
3903         mrqc = IXGBE_MRQC_VMDQEN;
3904         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3905
3906         /* PFVTCTL: turn on virtualisation and set the default pool */
3907         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3908         if (cfg->enable_default_pool)
3909                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3910         else
3911                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3912
3913         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3914
3915         for (i = 0; i < (int)num_pools; i++) {
3916                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3917                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3918         }
3919
3920         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3921         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3922         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3923         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3924
3925         /* VFTA - enable all vlan filters */
3926         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3927                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3928
3929         /* VFRE: pool enabling for receive - 64 */
3930         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3931         if (num_pools == ETH_64_POOLS)
3932                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3933
3934         /*
3935          * MPSAR - allow pools to read specific mac addresses
3936          * In this case, all pools should be able to read from mac addr 0
3937          */
3938         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3939         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3940
3941         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3942         for (i = 0; i < cfg->nb_pool_maps; i++) {
3943                 /* set vlan id in VF register and set the valid bit */
3944                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3945                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
3946                 /*
3947                  * Put the allowed pools in VFB reg. As we only have 16 or 64
3948                  * pools, we only need to use the first half of the register
3949                  * i.e. bits 0-31
3950                  */
3951                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
3952                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
3953                                         (cfg->pool_map[i].pools & UINT32_MAX));
3954                 else
3955                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
3956                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
3957
3958         }
3959
3960         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
3961         if (cfg->enable_loop_back) {
3962                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
3963                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
3964                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
3965         }
3966
3967         IXGBE_WRITE_FLUSH(hw);
3968 }
3969
3970 /*
3971  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
3972  * @hw: pointer to hardware structure
3973  */
3974 static void
3975 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
3976 {
3977         uint32_t reg;
3978         uint32_t q;
3979
3980         PMD_INIT_FUNC_TRACE();
3981         /*PF VF Transmit Enable*/
3982         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
3983         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
3984
3985         /* Disable the Tx desc arbiter so that MTQC can be changed */
3986         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3987         reg |= IXGBE_RTTDCS_ARBDIS;
3988         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3989
3990         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
3991         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3992
3993         /* Disable drop for all queues */
3994         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3995                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3996                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
3997
3998         /* Enable the Tx desc arbiter */
3999         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4000         reg &= ~IXGBE_RTTDCS_ARBDIS;
4001         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4002
4003         IXGBE_WRITE_FLUSH(hw);
4004 }
4005
4006 static int __attribute__((cold))
4007 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4008 {
4009         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4010         uint64_t dma_addr;
4011         unsigned int i;
4012
4013         /* Initialize software ring entries */
4014         for (i = 0; i < rxq->nb_rx_desc; i++) {
4015                 volatile union ixgbe_adv_rx_desc *rxd;
4016                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4017
4018                 if (mbuf == NULL) {
4019                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4020                                      (unsigned) rxq->queue_id);
4021                         return -ENOMEM;
4022                 }
4023
4024                 rte_mbuf_refcnt_set(mbuf, 1);
4025                 mbuf->next = NULL;
4026                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4027                 mbuf->nb_segs = 1;
4028                 mbuf->port = rxq->port_id;
4029
4030                 dma_addr =
4031                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4032                 rxd = &rxq->rx_ring[i];
4033                 rxd->read.hdr_addr = 0;
4034                 rxd->read.pkt_addr = dma_addr;
4035                 rxe[i].mbuf = mbuf;
4036         }
4037
4038         return 0;
4039 }
4040
4041 static int
4042 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4043 {
4044         struct ixgbe_hw *hw;
4045         uint32_t mrqc;
4046
4047         ixgbe_rss_configure(dev);
4048
4049         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4050
4051         /* MRQC: enable VF RSS */
4052         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4053         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4054         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4055         case ETH_64_POOLS:
4056                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4057                 break;
4058
4059         case ETH_32_POOLS:
4060                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4061                 break;
4062
4063         default:
4064                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4065                 return -EINVAL;
4066         }
4067
4068         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4069
4070         return 0;
4071 }
4072
4073 static int
4074 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4075 {
4076         struct ixgbe_hw *hw =
4077                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4078
4079         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4080         case ETH_64_POOLS:
4081                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4082                         IXGBE_MRQC_VMDQEN);
4083                 break;
4084
4085         case ETH_32_POOLS:
4086                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4087                         IXGBE_MRQC_VMDQRT4TCEN);
4088                 break;
4089
4090         case ETH_16_POOLS:
4091                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4092                         IXGBE_MRQC_VMDQRT8TCEN);
4093                 break;
4094         default:
4095                 PMD_INIT_LOG(ERR,
4096                         "invalid pool number in IOV mode");
4097                 break;
4098         }
4099         return 0;
4100 }
4101
4102 static int
4103 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4104 {
4105         struct ixgbe_hw *hw =
4106                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4107
4108         if (hw->mac.type == ixgbe_mac_82598EB)
4109                 return 0;
4110
4111         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4112                 /*
4113                  * SRIOV inactive scheme
4114                  * any DCB/RSS w/o VMDq multi-queue setting
4115                  */
4116                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4117                 case ETH_MQ_RX_RSS:
4118                 case ETH_MQ_RX_DCB_RSS:
4119                 case ETH_MQ_RX_VMDQ_RSS:
4120                         ixgbe_rss_configure(dev);
4121                         break;
4122
4123                 case ETH_MQ_RX_VMDQ_DCB:
4124                         ixgbe_vmdq_dcb_configure(dev);
4125                         break;
4126
4127                 case ETH_MQ_RX_VMDQ_ONLY:
4128                         ixgbe_vmdq_rx_hw_configure(dev);
4129                         break;
4130
4131                 case ETH_MQ_RX_NONE:
4132                 default:
4133                         /* if mq_mode is none, disable rss mode.*/
4134                         ixgbe_rss_disable(dev);
4135                         break;
4136                 }
4137         } else {
4138                 /*
4139                  * SRIOV active scheme
4140                  * Support RSS together with VMDq & SRIOV
4141                  */
4142                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4143                 case ETH_MQ_RX_RSS:
4144                 case ETH_MQ_RX_VMDQ_RSS:
4145                         ixgbe_config_vf_rss(dev);
4146                         break;
4147                 case ETH_MQ_RX_VMDQ_DCB:
4148                         ixgbe_vmdq_dcb_configure(dev);
4149                         break;
4150                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4151                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4152                         PMD_INIT_LOG(ERR,
4153                                 "Could not support DCB/RSS with VMDq & SRIOV");
4154                         return -1;
4155                 default:
4156                         ixgbe_config_vf_default(dev);
4157                         break;
4158                 }
4159         }
4160
4161         return 0;
4162 }
4163
4164 static int
4165 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4166 {
4167         struct ixgbe_hw *hw =
4168                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4169         uint32_t mtqc;
4170         uint32_t rttdcs;
4171
4172         if (hw->mac.type == ixgbe_mac_82598EB)
4173                 return 0;
4174
4175         /* disable arbiter before setting MTQC */
4176         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4177         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4178         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4179
4180         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4181                 /*
4182                  * SRIOV inactive scheme
4183                  * any DCB w/o VMDq multi-queue setting
4184                  */
4185                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4186                         ixgbe_vmdq_tx_hw_configure(hw);
4187                 else {
4188                         mtqc = IXGBE_MTQC_64Q_1PB;
4189                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4190                 }
4191         } else {
4192                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4193
4194                 /*
4195                  * SRIOV active scheme
4196                  * FIXME if support DCB together with VMDq & SRIOV
4197                  */
4198                 case ETH_64_POOLS:
4199                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4200                         break;
4201                 case ETH_32_POOLS:
4202                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4203                         break;
4204                 case ETH_16_POOLS:
4205                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4206                                 IXGBE_MTQC_8TC_8TQ;
4207                         break;
4208                 default:
4209                         mtqc = IXGBE_MTQC_64Q_1PB;
4210                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4211                 }
4212                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4213         }
4214
4215         /* re-enable arbiter */
4216         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4217         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4218
4219         return 0;
4220 }
4221
4222 /**
4223  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4224  *
4225  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4226  * spec rev. 3.0 chapter 8.2.3.8.13.
4227  *
4228  * @pool Memory pool of the Rx queue
4229  */
4230 static inline uint32_t
4231 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4232 {
4233         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4234
4235         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4236         uint16_t maxdesc =
4237                 IPV4_MAX_PKT_LEN /
4238                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4239
4240         if (maxdesc >= 16)
4241                 return IXGBE_RSCCTL_MAXDESC_16;
4242         else if (maxdesc >= 8)
4243                 return IXGBE_RSCCTL_MAXDESC_8;
4244         else if (maxdesc >= 4)
4245                 return IXGBE_RSCCTL_MAXDESC_4;
4246         else
4247                 return IXGBE_RSCCTL_MAXDESC_1;
4248 }
4249
4250 /**
4251  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4252  * interrupt
4253  *
4254  * (Taken from FreeBSD tree)
4255  * (yes this is all very magic and confusing :)
4256  *
4257  * @dev port handle
4258  * @entry the register array entry
4259  * @vector the MSIX vector for this queue
4260  * @type RX/TX/MISC
4261  */
4262 static void
4263 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4264 {
4265         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4266         u32 ivar, index;
4267
4268         vector |= IXGBE_IVAR_ALLOC_VAL;
4269
4270         switch (hw->mac.type) {
4271
4272         case ixgbe_mac_82598EB:
4273                 if (type == -1)
4274                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4275                 else
4276                         entry += (type * 64);
4277                 index = (entry >> 2) & 0x1F;
4278                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4279                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4280                 ivar |= (vector << (8 * (entry & 0x3)));
4281                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4282                 break;
4283
4284         case ixgbe_mac_82599EB:
4285         case ixgbe_mac_X540:
4286                 if (type == -1) { /* MISC IVAR */
4287                         index = (entry & 1) * 8;
4288                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4289                         ivar &= ~(0xFF << index);
4290                         ivar |= (vector << index);
4291                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4292                 } else {        /* RX/TX IVARS */
4293                         index = (16 * (entry & 1)) + (8 * type);
4294                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4295                         ivar &= ~(0xFF << index);
4296                         ivar |= (vector << index);
4297                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4298                 }
4299
4300                 break;
4301
4302         default:
4303                 break;
4304         }
4305 }
4306
4307 void __attribute__((cold))
4308 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4309 {
4310         uint16_t i, rx_using_sse;
4311         struct ixgbe_adapter *adapter =
4312                 (struct ixgbe_adapter *)dev->data->dev_private;
4313
4314         /*
4315          * In order to allow Vector Rx there are a few configuration
4316          * conditions to be met and Rx Bulk Allocation should be allowed.
4317          */
4318         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4319             !adapter->rx_bulk_alloc_allowed) {
4320                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4321                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4322                                     "not enabled",
4323                              dev->data->port_id);
4324
4325                 adapter->rx_vec_allowed = false;
4326         }
4327
4328         /*
4329          * Initialize the appropriate LRO callback.
4330          *
4331          * If all queues satisfy the bulk allocation preconditions
4332          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4333          * Otherwise use a single allocation version.
4334          */
4335         if (dev->data->lro) {
4336                 if (adapter->rx_bulk_alloc_allowed) {
4337                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4338                                            "allocation version");
4339                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4340                 } else {
4341                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4342                                            "allocation version");
4343                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4344                 }
4345         } else if (dev->data->scattered_rx) {
4346                 /*
4347                  * Set the non-LRO scattered callback: there are Vector and
4348                  * single allocation versions.
4349                  */
4350                 if (adapter->rx_vec_allowed) {
4351                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4352                                             "callback (port=%d).",
4353                                      dev->data->port_id);
4354
4355                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4356                 } else if (adapter->rx_bulk_alloc_allowed) {
4357                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4358                                            "allocation callback (port=%d).",
4359                                      dev->data->port_id);
4360                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4361                 } else {
4362                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4363                                             "single allocation) "
4364                                             "Scattered Rx callback "
4365                                             "(port=%d).",
4366                                      dev->data->port_id);
4367
4368                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4369                 }
4370         /*
4371          * Below we set "simple" callbacks according to port/queues parameters.
4372          * If parameters allow we are going to choose between the following
4373          * callbacks:
4374          *    - Vector
4375          *    - Bulk Allocation
4376          *    - Single buffer allocation (the simplest one)
4377          */
4378         } else if (adapter->rx_vec_allowed) {
4379                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4380                                     "burst size no less than %d (port=%d).",
4381                              RTE_IXGBE_DESCS_PER_LOOP,
4382                              dev->data->port_id);
4383
4384                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4385         } else if (adapter->rx_bulk_alloc_allowed) {
4386                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4387                                     "satisfied. Rx Burst Bulk Alloc function "
4388                                     "will be used on port=%d.",
4389                              dev->data->port_id);
4390
4391                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4392         } else {
4393                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4394                                     "satisfied, or Scattered Rx is requested "
4395                                     "(port=%d).",
4396                              dev->data->port_id);
4397
4398                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4399         }
4400
4401         /* Propagate information about RX function choice through all queues. */
4402
4403         rx_using_sse =
4404                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4405                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4406
4407         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4408                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4409
4410                 rxq->rx_using_sse = rx_using_sse;
4411         }
4412 }
4413
4414 /**
4415  * ixgbe_set_rsc - configure RSC related port HW registers
4416  *
4417  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4418  * of 82599 Spec (x540 configuration is virtually the same).
4419  *
4420  * @dev port handle
4421  *
4422  * Returns 0 in case of success or a non-zero error code
4423  */
4424 static int
4425 ixgbe_set_rsc(struct rte_eth_dev *dev)
4426 {
4427         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4428         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4429         struct rte_eth_dev_info dev_info = { 0 };
4430         bool rsc_capable = false;
4431         uint16_t i;
4432         uint32_t rdrxctl;
4433
4434         /* Sanity check */
4435         dev->dev_ops->dev_infos_get(dev, &dev_info);
4436         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4437                 rsc_capable = true;
4438
4439         if (!rsc_capable && rx_conf->enable_lro) {
4440                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4441                                    "support it");
4442                 return -EINVAL;
4443         }
4444
4445         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4446
4447         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4448                 /*
4449                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4450                  * 3.0 RSC configuration requires HW CRC stripping being
4451                  * enabled. If user requested both HW CRC stripping off
4452                  * and RSC on - return an error.
4453                  */
4454                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4455                                     "is disabled");
4456                 return -EINVAL;
4457         }
4458
4459         /* RFCTL configuration  */
4460         if (rsc_capable) {
4461                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4462
4463                 if (rx_conf->enable_lro)
4464                         /*
4465                          * Since NFS packets coalescing is not supported - clear
4466                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4467                          * enabled.
4468                          */
4469                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4470                                    IXGBE_RFCTL_NFSR_DIS);
4471                 else
4472                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4473
4474                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4475         }
4476
4477         /* If LRO hasn't been requested - we are done here. */
4478         if (!rx_conf->enable_lro)
4479                 return 0;
4480
4481         /* Set RDRXCTL.RSCACKC bit */
4482         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4483         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4484         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4485
4486         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4487         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4488                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4489                 uint32_t srrctl =
4490                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4491                 uint32_t rscctl =
4492                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4493                 uint32_t psrtype =
4494                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4495                 uint32_t eitr =
4496                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4497
4498                 /*
4499                  * ixgbe PMD doesn't support header-split at the moment.
4500                  *
4501                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4502                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4503                  * should be configured even if header split is not
4504                  * enabled. We will configure it 128 bytes following the
4505                  * recommendation in the spec.
4506                  */
4507                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4508                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4509                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4510
4511                 /*
4512                  * TODO: Consider setting the Receive Descriptor Minimum
4513                  * Threshold Size for an RSC case. This is not an obviously
4514                  * beneficiary option but the one worth considering...
4515                  */
4516
4517                 rscctl |= IXGBE_RSCCTL_RSCEN;
4518                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4519                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4520
4521                 /*
4522                  * RSC: Set ITR interval corresponding to 2K ints/s.
4523                  *
4524                  * Full-sized RSC aggregations for a 10Gb/s link will
4525                  * arrive at about 20K aggregation/s rate.
4526                  *
4527                  * 2K inst/s rate will make only 10% of the
4528                  * aggregations to be closed due to the interrupt timer
4529                  * expiration for a streaming at wire-speed case.
4530                  *
4531                  * For a sparse streaming case this setting will yield
4532                  * at most 500us latency for a single RSC aggregation.
4533                  */
4534                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4535                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4536
4537                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4538                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4539                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4540                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4541
4542                 /*
4543                  * RSC requires the mapping of the queue to the
4544                  * interrupt vector.
4545                  */
4546                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4547         }
4548
4549         dev->data->lro = 1;
4550
4551         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4552
4553         return 0;
4554 }
4555
4556 /*
4557  * Initializes Receive Unit.
4558  */
4559 int __attribute__((cold))
4560 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4561 {
4562         struct ixgbe_hw     *hw;
4563         struct ixgbe_rx_queue *rxq;
4564         uint64_t bus_addr;
4565         uint32_t rxctrl;
4566         uint32_t fctrl;
4567         uint32_t hlreg0;
4568         uint32_t maxfrs;
4569         uint32_t srrctl;
4570         uint32_t rdrxctl;
4571         uint32_t rxcsum;
4572         uint16_t buf_size;
4573         uint16_t i;
4574         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4575         int rc;
4576
4577         PMD_INIT_FUNC_TRACE();
4578         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4579
4580         /*
4581          * Make sure receives are disabled while setting
4582          * up the RX context (registers, descriptor rings, etc.).
4583          */
4584         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4585         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4586
4587         /* Enable receipt of broadcasted frames */
4588         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4589         fctrl |= IXGBE_FCTRL_BAM;
4590         fctrl |= IXGBE_FCTRL_DPF;
4591         fctrl |= IXGBE_FCTRL_PMCF;
4592         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4593
4594         /*
4595          * Configure CRC stripping, if any.
4596          */
4597         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4598         if (rx_conf->hw_strip_crc)
4599                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4600         else
4601                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4602
4603         /*
4604          * Configure jumbo frame support, if any.
4605          */
4606         if (rx_conf->jumbo_frame == 1) {
4607                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4608                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4609                 maxfrs &= 0x0000FFFF;
4610                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4611                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4612         } else
4613                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4614
4615         /*
4616          * If loopback mode is configured for 82599, set LPBK bit.
4617          */
4618         if (hw->mac.type == ixgbe_mac_82599EB &&
4619                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4620                 hlreg0 |= IXGBE_HLREG0_LPBK;
4621         else
4622                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4623
4624         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4625
4626         /* Setup RX queues */
4627         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4628                 rxq = dev->data->rx_queues[i];
4629
4630                 /*
4631                  * Reset crc_len in case it was changed after queue setup by a
4632                  * call to configure.
4633                  */
4634                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4635
4636                 /* Setup the Base and Length of the Rx Descriptor Rings */
4637                 bus_addr = rxq->rx_ring_phys_addr;
4638                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4639                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4640                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4641                                 (uint32_t)(bus_addr >> 32));
4642                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4643                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4644                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4645                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4646
4647                 /* Configure the SRRCTL register */
4648 #ifdef RTE_HEADER_SPLIT_ENABLE
4649                 /*
4650                  * Configure Header Split
4651                  */
4652                 if (rx_conf->header_split) {
4653                         if (hw->mac.type == ixgbe_mac_82599EB) {
4654                                 /* Must setup the PSRTYPE register */
4655                                 uint32_t psrtype;
4656
4657                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4658                                         IXGBE_PSRTYPE_UDPHDR   |
4659                                         IXGBE_PSRTYPE_IPV4HDR  |
4660                                         IXGBE_PSRTYPE_IPV6HDR;
4661                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4662                         }
4663                         srrctl = ((rx_conf->split_hdr_size <<
4664                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4665                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4666                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4667                 } else
4668 #endif
4669                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4670
4671                 /* Set if packets are dropped when no descriptors available */
4672                 if (rxq->drop_en)
4673                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4674
4675                 /*
4676                  * Configure the RX buffer size in the BSIZEPACKET field of
4677                  * the SRRCTL register of the queue.
4678                  * The value is in 1 KB resolution. Valid values can be from
4679                  * 1 KB to 16 KB.
4680                  */
4681                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4682                         RTE_PKTMBUF_HEADROOM);
4683                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4684                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4685
4686                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4687
4688                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4689                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4690
4691                 /* It adds dual VLAN length for supporting dual VLAN */
4692                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4693                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4694                         dev->data->scattered_rx = 1;
4695         }
4696
4697         if (rx_conf->enable_scatter)
4698                 dev->data->scattered_rx = 1;
4699
4700         /*
4701          * Device configured with multiple RX queues.
4702          */
4703         ixgbe_dev_mq_rx_configure(dev);
4704
4705         /*
4706          * Setup the Checksum Register.
4707          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4708          * Enable IP/L4 checkum computation by hardware if requested to do so.
4709          */
4710         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4711         rxcsum |= IXGBE_RXCSUM_PCSD;
4712         if (rx_conf->hw_ip_checksum)
4713                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4714         else
4715                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4716
4717         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4718
4719         if (hw->mac.type == ixgbe_mac_82599EB ||
4720             hw->mac.type == ixgbe_mac_X540) {
4721                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4722                 if (rx_conf->hw_strip_crc)
4723                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4724                 else
4725                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4726                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4727                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4728         }
4729
4730         rc = ixgbe_set_rsc(dev);
4731         if (rc)
4732                 return rc;
4733
4734         ixgbe_set_rx_function(dev);
4735
4736         return 0;
4737 }
4738
4739 /*
4740  * Initializes Transmit Unit.
4741  */
4742 void __attribute__((cold))
4743 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4744 {
4745         struct ixgbe_hw     *hw;
4746         struct ixgbe_tx_queue *txq;
4747         uint64_t bus_addr;
4748         uint32_t hlreg0;
4749         uint32_t txctrl;
4750         uint16_t i;
4751
4752         PMD_INIT_FUNC_TRACE();
4753         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4754
4755         /* Enable TX CRC (checksum offload requirement) and hw padding
4756          * (TSO requirement)
4757          */
4758         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4759         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4760         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4761
4762         /* Setup the Base and Length of the Tx Descriptor Rings */
4763         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4764                 txq = dev->data->tx_queues[i];
4765
4766                 bus_addr = txq->tx_ring_phys_addr;
4767                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4768                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4769                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4770                                 (uint32_t)(bus_addr >> 32));
4771                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4772                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4773                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4774                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4775                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4776
4777                 /*
4778                  * Disable Tx Head Writeback RO bit, since this hoses
4779                  * bookkeeping if things aren't delivered in order.
4780                  */
4781                 switch (hw->mac.type) {
4782                 case ixgbe_mac_82598EB:
4783                         txctrl = IXGBE_READ_REG(hw,
4784                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4785                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4786                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4787                                         txctrl);
4788                         break;
4789
4790                 case ixgbe_mac_82599EB:
4791                 case ixgbe_mac_X540:
4792                 case ixgbe_mac_X550:
4793                 case ixgbe_mac_X550EM_x:
4794                 case ixgbe_mac_X550EM_a:
4795                 default:
4796                         txctrl = IXGBE_READ_REG(hw,
4797                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4798                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4799                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4800                                         txctrl);
4801                         break;
4802                 }
4803         }
4804
4805         /* Device configured with multiple TX queues. */
4806         ixgbe_dev_mq_tx_configure(dev);
4807 }
4808
4809 /*
4810  * Set up link for 82599 loopback mode Tx->Rx.
4811  */
4812 static inline void __attribute__((cold))
4813 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4814 {
4815         PMD_INIT_FUNC_TRACE();
4816
4817         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4818                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4819                                 IXGBE_SUCCESS) {
4820                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4821                         /* ignore error */
4822                         return;
4823                 }
4824         }
4825
4826         /* Restart link */
4827         IXGBE_WRITE_REG(hw,
4828                         IXGBE_AUTOC,
4829                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4830         ixgbe_reset_pipeline_82599(hw);
4831
4832         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4833         msec_delay(50);
4834 }
4835
4836
4837 /*
4838  * Start Transmit and Receive Units.
4839  */
4840 int __attribute__((cold))
4841 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4842 {
4843         struct ixgbe_hw     *hw;
4844         struct ixgbe_tx_queue *txq;
4845         struct ixgbe_rx_queue *rxq;
4846         uint32_t txdctl;
4847         uint32_t dmatxctl;
4848         uint32_t rxctrl;
4849         uint16_t i;
4850         int ret = 0;
4851
4852         PMD_INIT_FUNC_TRACE();
4853         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4854
4855         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4856                 txq = dev->data->tx_queues[i];
4857                 /* Setup Transmit Threshold Registers */
4858                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4859                 txdctl |= txq->pthresh & 0x7F;
4860                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4861                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4862                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4863         }
4864
4865         if (hw->mac.type != ixgbe_mac_82598EB) {
4866                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4867                 dmatxctl |= IXGBE_DMATXCTL_TE;
4868                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4869         }
4870
4871         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4872                 txq = dev->data->tx_queues[i];
4873                 if (!txq->tx_deferred_start) {
4874                         ret = ixgbe_dev_tx_queue_start(dev, i);
4875                         if (ret < 0)
4876                                 return ret;
4877                 }
4878         }
4879
4880         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4881                 rxq = dev->data->rx_queues[i];
4882                 if (!rxq->rx_deferred_start) {
4883                         ret = ixgbe_dev_rx_queue_start(dev, i);
4884                         if (ret < 0)
4885                                 return ret;
4886                 }
4887         }
4888
4889         /* Enable Receive engine */
4890         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4891         if (hw->mac.type == ixgbe_mac_82598EB)
4892                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4893         rxctrl |= IXGBE_RXCTRL_RXEN;
4894         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4895
4896         /* If loopback mode is enabled for 82599, set up the link accordingly */
4897         if (hw->mac.type == ixgbe_mac_82599EB &&
4898                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4899                 ixgbe_setup_loopback_link_82599(hw);
4900
4901         return 0;
4902 }
4903
4904 /*
4905  * Start Receive Units for specified queue.
4906  */
4907 int __attribute__((cold))
4908 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4909 {
4910         struct ixgbe_hw     *hw;
4911         struct ixgbe_rx_queue *rxq;
4912         uint32_t rxdctl;
4913         int poll_ms;
4914
4915         PMD_INIT_FUNC_TRACE();
4916         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4917
4918         if (rx_queue_id < dev->data->nb_rx_queues) {
4919                 rxq = dev->data->rx_queues[rx_queue_id];
4920
4921                 /* Allocate buffers for descriptor rings */
4922                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4923                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4924                                      rx_queue_id);
4925                         return -1;
4926                 }
4927                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4928                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4929                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4930
4931                 /* Wait until RX Enable ready */
4932                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4933                 do {
4934                         rte_delay_ms(1);
4935                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4936                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4937                 if (!poll_ms)
4938                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4939                                      rx_queue_id);
4940                 rte_wmb();
4941                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4942                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4943                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
4944         } else
4945                 return -1;
4946
4947         return 0;
4948 }
4949
4950 /*
4951  * Stop Receive Units for specified queue.
4952  */
4953 int __attribute__((cold))
4954 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4955 {
4956         struct ixgbe_hw     *hw;
4957         struct ixgbe_adapter *adapter =
4958                 (struct ixgbe_adapter *)dev->data->dev_private;
4959         struct ixgbe_rx_queue *rxq;
4960         uint32_t rxdctl;
4961         int poll_ms;
4962
4963         PMD_INIT_FUNC_TRACE();
4964         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4965
4966         if (rx_queue_id < dev->data->nb_rx_queues) {
4967                 rxq = dev->data->rx_queues[rx_queue_id];
4968
4969                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4970                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
4971                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4972
4973                 /* Wait until RX Enable bit clear */
4974                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4975                 do {
4976                         rte_delay_ms(1);
4977                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4978                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
4979                 if (!poll_ms)
4980                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
4981                                      rx_queue_id);
4982
4983                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
4984
4985                 ixgbe_rx_queue_release_mbufs(rxq);
4986                 ixgbe_reset_rx_queue(adapter, rxq);
4987                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
4988         } else
4989                 return -1;
4990
4991         return 0;
4992 }
4993
4994
4995 /*
4996  * Start Transmit Units for specified queue.
4997  */
4998 int __attribute__((cold))
4999 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5000 {
5001         struct ixgbe_hw     *hw;
5002         struct ixgbe_tx_queue *txq;
5003         uint32_t txdctl;
5004         int poll_ms;
5005
5006         PMD_INIT_FUNC_TRACE();
5007         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5008
5009         if (tx_queue_id < dev->data->nb_tx_queues) {
5010                 txq = dev->data->tx_queues[tx_queue_id];
5011                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5012                 txdctl |= IXGBE_TXDCTL_ENABLE;
5013                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5014
5015                 /* Wait until TX Enable ready */
5016                 if (hw->mac.type == ixgbe_mac_82599EB) {
5017                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5018                         do {
5019                                 rte_delay_ms(1);
5020                                 txdctl = IXGBE_READ_REG(hw,
5021                                         IXGBE_TXDCTL(txq->reg_idx));
5022                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5023                         if (!poll_ms)
5024                                 PMD_INIT_LOG(ERR, "Could not enable "
5025                                              "Tx Queue %d", tx_queue_id);
5026                 }
5027                 rte_wmb();
5028                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5029                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5030                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5031         } else
5032                 return -1;
5033
5034         return 0;
5035 }
5036
5037 /*
5038  * Stop Transmit Units for specified queue.
5039  */
5040 int __attribute__((cold))
5041 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5042 {
5043         struct ixgbe_hw     *hw;
5044         struct ixgbe_tx_queue *txq;
5045         uint32_t txdctl;
5046         uint32_t txtdh, txtdt;
5047         int poll_ms;
5048
5049         PMD_INIT_FUNC_TRACE();
5050         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5051
5052         if (tx_queue_id >= dev->data->nb_tx_queues)
5053                 return -1;
5054
5055         txq = dev->data->tx_queues[tx_queue_id];
5056
5057         /* Wait until TX queue is empty */
5058         if (hw->mac.type == ixgbe_mac_82599EB) {
5059                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5060                 do {
5061                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5062                         txtdh = IXGBE_READ_REG(hw,
5063                                                IXGBE_TDH(txq->reg_idx));
5064                         txtdt = IXGBE_READ_REG(hw,
5065                                                IXGBE_TDT(txq->reg_idx));
5066                 } while (--poll_ms && (txtdh != txtdt));
5067                 if (!poll_ms)
5068                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5069                                      "when stopping.", tx_queue_id);
5070         }
5071
5072         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5073         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5074         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5075
5076         /* Wait until TX Enable bit clear */
5077         if (hw->mac.type == ixgbe_mac_82599EB) {
5078                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5079                 do {
5080                         rte_delay_ms(1);
5081                         txdctl = IXGBE_READ_REG(hw,
5082                                                 IXGBE_TXDCTL(txq->reg_idx));
5083                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5084                 if (!poll_ms)
5085                         PMD_INIT_LOG(ERR, "Could not disable "
5086                                      "Tx Queue %d", tx_queue_id);
5087         }
5088
5089         if (txq->ops != NULL) {
5090                 txq->ops->release_mbufs(txq);
5091                 txq->ops->reset(txq);
5092         }
5093         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5094
5095         return 0;
5096 }
5097
5098 void
5099 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5100         struct rte_eth_rxq_info *qinfo)
5101 {
5102         struct ixgbe_rx_queue *rxq;
5103
5104         rxq = dev->data->rx_queues[queue_id];
5105
5106         qinfo->mp = rxq->mb_pool;
5107         qinfo->scattered_rx = dev->data->scattered_rx;
5108         qinfo->nb_desc = rxq->nb_rx_desc;
5109
5110         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5111         qinfo->conf.rx_drop_en = rxq->drop_en;
5112         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5113 }
5114
5115 void
5116 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5117         struct rte_eth_txq_info *qinfo)
5118 {
5119         struct ixgbe_tx_queue *txq;
5120
5121         txq = dev->data->tx_queues[queue_id];
5122
5123         qinfo->nb_desc = txq->nb_tx_desc;
5124
5125         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5126         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5127         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5128
5129         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5130         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5131         qinfo->conf.txq_flags = txq->txq_flags;
5132         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5133 }
5134
5135 /*
5136  * [VF] Initializes Receive Unit.
5137  */
5138 int __attribute__((cold))
5139 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5140 {
5141         struct ixgbe_hw     *hw;
5142         struct ixgbe_rx_queue *rxq;
5143         uint64_t bus_addr;
5144         uint32_t srrctl, psrtype = 0;
5145         uint16_t buf_size;
5146         uint16_t i;
5147         int ret;
5148
5149         PMD_INIT_FUNC_TRACE();
5150         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5151
5152         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5153                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5154                         "it should be power of 2");
5155                 return -1;
5156         }
5157
5158         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5159                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5160                         "it should be equal to or less than %d",
5161                         hw->mac.max_rx_queues);
5162                 return -1;
5163         }
5164
5165         /*
5166          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5167          * disables the VF receipt of packets if the PF MTU is > 1500.
5168          * This is done to deal with 82599 limitations that imposes
5169          * the PF and all VFs to share the same MTU.
5170          * Then, the PF driver enables again the VF receipt of packet when
5171          * the VF driver issues a IXGBE_VF_SET_LPE request.
5172          * In the meantime, the VF device cannot be used, even if the VF driver
5173          * and the Guest VM network stack are ready to accept packets with a
5174          * size up to the PF MTU.
5175          * As a work-around to this PF behaviour, force the call to
5176          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5177          * VF packets received can work in all cases.
5178          */
5179         ixgbevf_rlpml_set_vf(hw,
5180                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5181
5182         /* Setup RX queues */
5183         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5184                 rxq = dev->data->rx_queues[i];
5185
5186                 /* Allocate buffers for descriptor rings */
5187                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5188                 if (ret)
5189                         return ret;
5190
5191                 /* Setup the Base and Length of the Rx Descriptor Rings */
5192                 bus_addr = rxq->rx_ring_phys_addr;
5193
5194                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5195                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5196                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5197                                 (uint32_t)(bus_addr >> 32));
5198                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5199                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5200                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5201                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5202
5203
5204                 /* Configure the SRRCTL register */
5205 #ifdef RTE_HEADER_SPLIT_ENABLE
5206                 /*
5207                  * Configure Header Split
5208                  */
5209                 if (dev->data->dev_conf.rxmode.header_split) {
5210                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5211                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5212                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5213                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5214                 } else
5215 #endif
5216                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5217
5218                 /* Set if packets are dropped when no descriptors available */
5219                 if (rxq->drop_en)
5220                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5221
5222                 /*
5223                  * Configure the RX buffer size in the BSIZEPACKET field of
5224                  * the SRRCTL register of the queue.
5225                  * The value is in 1 KB resolution. Valid values can be from
5226                  * 1 KB to 16 KB.
5227                  */
5228                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5229                         RTE_PKTMBUF_HEADROOM);
5230                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5231                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5232
5233                 /*
5234                  * VF modification to write virtual function SRRCTL register
5235                  */
5236                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5237
5238                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5239                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5240
5241                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5242                     /* It adds dual VLAN length for supporting dual VLAN */
5243                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5244                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5245                         if (!dev->data->scattered_rx)
5246                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5247                         dev->data->scattered_rx = 1;
5248                 }
5249         }
5250
5251 #ifdef RTE_HEADER_SPLIT_ENABLE
5252         if (dev->data->dev_conf.rxmode.header_split)
5253                 /* Must setup the PSRTYPE register */
5254                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5255                         IXGBE_PSRTYPE_UDPHDR   |
5256                         IXGBE_PSRTYPE_IPV4HDR  |
5257                         IXGBE_PSRTYPE_IPV6HDR;
5258 #endif
5259
5260         /* Set RQPL for VF RSS according to max Rx queue */
5261         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5262                 IXGBE_PSRTYPE_RQPL_SHIFT;
5263         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5264
5265         ixgbe_set_rx_function(dev);
5266
5267         return 0;
5268 }
5269
5270 /*
5271  * [VF] Initializes Transmit Unit.
5272  */
5273 void __attribute__((cold))
5274 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5275 {
5276         struct ixgbe_hw     *hw;
5277         struct ixgbe_tx_queue *txq;
5278         uint64_t bus_addr;
5279         uint32_t txctrl;
5280         uint16_t i;
5281
5282         PMD_INIT_FUNC_TRACE();
5283         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5284
5285         /* Setup the Base and Length of the Tx Descriptor Rings */
5286         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5287                 txq = dev->data->tx_queues[i];
5288                 bus_addr = txq->tx_ring_phys_addr;
5289                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5290                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5291                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5292                                 (uint32_t)(bus_addr >> 32));
5293                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5294                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5295                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5296                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5297                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5298
5299                 /*
5300                  * Disable Tx Head Writeback RO bit, since this hoses
5301                  * bookkeeping if things aren't delivered in order.
5302                  */
5303                 txctrl = IXGBE_READ_REG(hw,
5304                                 IXGBE_VFDCA_TXCTRL(i));
5305                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5306                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5307                                 txctrl);
5308         }
5309 }
5310
5311 /*
5312  * [VF] Start Transmit and Receive Units.
5313  */
5314 void __attribute__((cold))
5315 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5316 {
5317         struct ixgbe_hw     *hw;
5318         struct ixgbe_tx_queue *txq;
5319         struct ixgbe_rx_queue *rxq;
5320         uint32_t txdctl;
5321         uint32_t rxdctl;
5322         uint16_t i;
5323         int poll_ms;
5324
5325         PMD_INIT_FUNC_TRACE();
5326         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5327
5328         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5329                 txq = dev->data->tx_queues[i];
5330                 /* Setup Transmit Threshold Registers */
5331                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5332                 txdctl |= txq->pthresh & 0x7F;
5333                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5334                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5335                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5336         }
5337
5338         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5339
5340                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5341                 txdctl |= IXGBE_TXDCTL_ENABLE;
5342                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5343
5344                 poll_ms = 10;
5345                 /* Wait until TX Enable ready */
5346                 do {
5347                         rte_delay_ms(1);
5348                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5349                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5350                 if (!poll_ms)
5351                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5352         }
5353         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5354
5355                 rxq = dev->data->rx_queues[i];
5356
5357                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5358                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5359                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5360
5361                 /* Wait until RX Enable ready */
5362                 poll_ms = 10;
5363                 do {
5364                         rte_delay_ms(1);
5365                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5366                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5367                 if (!poll_ms)
5368                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5369                 rte_wmb();
5370                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5371
5372         }
5373 }
5374
5375 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5376 int __attribute__((weak))
5377 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5378 {
5379         return -1;
5380 }
5381
5382 uint16_t __attribute__((weak))
5383 ixgbe_recv_pkts_vec(
5384         void __rte_unused *rx_queue,
5385         struct rte_mbuf __rte_unused **rx_pkts,
5386         uint16_t __rte_unused nb_pkts)
5387 {
5388         return 0;
5389 }
5390
5391 uint16_t __attribute__((weak))
5392 ixgbe_recv_scattered_pkts_vec(
5393         void __rte_unused *rx_queue,
5394         struct rte_mbuf __rte_unused **rx_pkts,
5395         uint16_t __rte_unused nb_pkts)
5396 {
5397         return 0;
5398 }
5399
5400 int __attribute__((weak))
5401 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5402 {
5403         return -1;
5404 }