net/ixgbe: fix all queues drop setting of DCB
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i, nb_free = 0;
130         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
131
132         /* check DD bit on threshold descriptor */
133         status = txq->tx_ring[txq->tx_next_dd].wb.status;
134         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
135                 return 0;
136
137         /*
138          * first buffer to free from S/W ring is at index
139          * tx_next_dd - (tx_rs_thresh-1)
140          */
141         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
142
143         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
144                 /* free buffers one at a time */
145                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
146                 txep->mbuf = NULL;
147
148                 if (unlikely(m == NULL))
149                         continue;
150
151                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
152                     (nb_free > 0 && m->pool != free[0]->pool)) {
153                         rte_mempool_put_bulk(free[0]->pool,
154                                              (void **)free, nb_free);
155                         nb_free = 0;
156                 }
157
158                 free[nb_free++] = m;
159         }
160
161         if (nb_free > 0)
162                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
163
164         /* buffers were freed, update counters */
165         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
166         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
167         if (txq->tx_next_dd >= txq->nb_tx_desc)
168                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
169
170         return txq->tx_rs_thresh;
171 }
172
173 /* Populate 4 descriptors with data from 4 mbufs */
174 static inline void
175 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
176 {
177         uint64_t buf_dma_addr;
178         uint32_t pkt_len;
179         int i;
180
181         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
182                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
183                 pkt_len = (*pkts)->data_len;
184
185                 /* write data to descriptor */
186                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187
188                 txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190
191                 txdp->read.olinfo_status =
192                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
193
194                 rte_prefetch0(&(*pkts)->pool);
195         }
196 }
197
198 /* Populate 1 descriptor with data from 1 mbuf */
199 static inline void
200 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
201 {
202         uint64_t buf_dma_addr;
203         uint32_t pkt_len;
204
205         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
206         pkt_len = (*pkts)->data_len;
207
208         /* write data to descriptor */
209         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
210         txdp->read.cmd_type_len =
211                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
212         txdp->read.olinfo_status =
213                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
214         rte_prefetch0(&(*pkts)->pool);
215 }
216
217 /*
218  * Fill H/W descriptor ring with mbuf data.
219  * Copy mbuf pointers to the S/W ring.
220  */
221 static inline void
222 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
223                       uint16_t nb_pkts)
224 {
225         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
226         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
227         const int N_PER_LOOP = 4;
228         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
229         int mainpart, leftover;
230         int i, j;
231
232         /*
233          * Process most of the packets in chunks of N pkts.  Any
234          * leftover packets will get processed one at a time.
235          */
236         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
237         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
238         for (i = 0; i < mainpart; i += N_PER_LOOP) {
239                 /* Copy N mbuf pointers to the S/W ring */
240                 for (j = 0; j < N_PER_LOOP; ++j) {
241                         (txep + i + j)->mbuf = *(pkts + i + j);
242                 }
243                 tx4(txdp + i, pkts + i);
244         }
245
246         if (unlikely(leftover > 0)) {
247                 for (i = 0; i < leftover; ++i) {
248                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
249                         tx1(txdp + mainpart + i, pkts + mainpart + i);
250                 }
251         }
252 }
253
254 static inline uint16_t
255 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
256              uint16_t nb_pkts)
257 {
258         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
259         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
260         uint16_t n = 0;
261
262         /*
263          * Begin scanning the H/W ring for done descriptors when the
264          * number of available descriptors drops below tx_free_thresh.  For
265          * each done descriptor, free the associated buffer.
266          */
267         if (txq->nb_tx_free < txq->tx_free_thresh)
268                 ixgbe_tx_free_bufs(txq);
269
270         /* Only use descriptors that are available */
271         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
272         if (unlikely(nb_pkts == 0))
273                 return 0;
274
275         /* Use exactly nb_pkts descriptors */
276         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
277
278         /*
279          * At this point, we know there are enough descriptors in the
280          * ring to transmit all the packets.  This assumes that each
281          * mbuf contains a single segment, and that no new offloads
282          * are expected, which would require a new context descriptor.
283          */
284
285         /*
286          * See if we're going to wrap-around. If so, handle the top
287          * of the descriptor ring first, then do the bottom.  If not,
288          * the processing looks just like the "bottom" part anyway...
289          */
290         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
291                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
292                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
293
294                 /*
295                  * We know that the last descriptor in the ring will need to
296                  * have its RS bit set because tx_rs_thresh has to be
297                  * a divisor of the ring size
298                  */
299                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
300                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
301                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302
303                 txq->tx_tail = 0;
304         }
305
306         /* Fill H/W descriptor ring with mbuf data */
307         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
308         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
309
310         /*
311          * Determine if RS bit should be set
312          * This is what we actually want:
313          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
314          * but instead of subtracting 1 and doing >=, we can just do
315          * greater than without subtracting.
316          */
317         if (txq->tx_tail > txq->tx_next_rs) {
318                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
319                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
320                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
321                                                 txq->tx_rs_thresh);
322                 if (txq->tx_next_rs >= txq->nb_tx_desc)
323                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
324         }
325
326         /*
327          * Check for wrap-around. This would only happen if we used
328          * up to the last descriptor in the ring, no more, no less.
329          */
330         if (txq->tx_tail >= txq->nb_tx_desc)
331                 txq->tx_tail = 0;
332
333         /* update tail pointer */
334         rte_wmb();
335         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
336
337         return nb_pkts;
338 }
339
340 uint16_t
341 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
342                        uint16_t nb_pkts)
343 {
344         uint16_t nb_tx;
345
346         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
347         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
348                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
349
350         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
351         nb_tx = 0;
352         while (nb_pkts) {
353                 uint16_t ret, n;
354
355                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
356                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
357                 nb_tx = (uint16_t)(nb_tx + ret);
358                 nb_pkts = (uint16_t)(nb_pkts - ret);
359                 if (ret < n)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
370 {
371         uint32_t type_tucmd_mlhl;
372         uint32_t mss_l4len_idx = 0;
373         uint32_t ctx_idx;
374         uint32_t vlan_macip_lens;
375         union ixgbe_tx_offload tx_offload_mask;
376         uint32_t seqnum_seed = 0;
377
378         ctx_idx = txq->ctx_curr;
379         tx_offload_mask.data[0] = 0;
380         tx_offload_mask.data[1] = 0;
381         type_tucmd_mlhl = 0;
382
383         /* Specify which HW CTX to upload. */
384         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
385
386         if (ol_flags & PKT_TX_VLAN_PKT) {
387                 tx_offload_mask.vlan_tci |= ~0;
388         }
389
390         /* check if TCP segmentation required for this packet */
391         if (ol_flags & PKT_TX_TCP_SEG) {
392                 /* implies IP cksum in IPv4 */
393                 if (ol_flags & PKT_TX_IP_CKSUM)
394                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
397                 else
398                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
401
402                 tx_offload_mask.l2_len |= ~0;
403                 tx_offload_mask.l3_len |= ~0;
404                 tx_offload_mask.l4_len |= ~0;
405                 tx_offload_mask.tso_segsz |= ~0;
406                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408         } else { /* no TSO, check if hardware checksum is needed */
409                 if (ol_flags & PKT_TX_IP_CKSUM) {
410                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411                         tx_offload_mask.l2_len |= ~0;
412                         tx_offload_mask.l3_len |= ~0;
413                 }
414
415                 switch (ol_flags & PKT_TX_L4_MASK) {
416                 case PKT_TX_UDP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427                         tx_offload_mask.l2_len |= ~0;
428                         tx_offload_mask.l3_len |= ~0;
429                         break;
430                 case PKT_TX_SCTP_CKSUM:
431                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
432                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
434                         tx_offload_mask.l2_len |= ~0;
435                         tx_offload_mask.l3_len |= ~0;
436                         break;
437                 default:
438                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
439                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
440                         break;
441                 }
442         }
443
444         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
445                 tx_offload_mask.outer_l2_len |= ~0;
446                 tx_offload_mask.outer_l3_len |= ~0;
447                 tx_offload_mask.l2_len |= ~0;
448                 seqnum_seed |= tx_offload.outer_l3_len
449                                << IXGBE_ADVTXD_OUTER_IPLEN;
450                 seqnum_seed |= tx_offload.l2_len
451                                << IXGBE_ADVTXD_TUNNEL_LEN;
452         }
453
454         txq->ctx_cache[ctx_idx].flags = ol_flags;
455         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
456                 tx_offload_mask.data[0] & tx_offload.data[0];
457         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
458                 tx_offload_mask.data[1] & tx_offload.data[1];
459         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
460
461         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
462         vlan_macip_lens = tx_offload.l3_len;
463         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
464                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
465                                     IXGBE_ADVTXD_MACLEN_SHIFT);
466         else
467                 vlan_macip_lens |= (tx_offload.l2_len <<
468                                     IXGBE_ADVTXD_MACLEN_SHIFT);
469         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
470         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
471         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
472         ctx_txd->seqnum_seed     = seqnum_seed;
473 }
474
475 /*
476  * Check which hardware context can be used. Use the existing match
477  * or create a new context descriptor.
478  */
479 static inline uint32_t
480 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
481                    union ixgbe_tx_offload tx_offload)
482 {
483         /* If match with the current used context */
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* What if match with the next context  */
494         txq->ctx_curr ^= 1;
495         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
496                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
497                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
498                      & tx_offload.data[0])) &&
499                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
500                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
501                      & tx_offload.data[1]))))
502                 return txq->ctx_curr;
503
504         /* Mismatch, use the previous context */
505         return IXGBE_CTX_NUM;
506 }
507
508 static inline uint32_t
509 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
510 {
511         uint32_t tmp = 0;
512
513         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
514                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
515         if (ol_flags & PKT_TX_IP_CKSUM)
516                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
517         if (ol_flags & PKT_TX_TCP_SEG)
518                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
519         return tmp;
520 }
521
522 static inline uint32_t
523 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
524 {
525         uint32_t cmdtype = 0;
526
527         if (ol_flags & PKT_TX_VLAN_PKT)
528                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
529         if (ol_flags & PKT_TX_TCP_SEG)
530                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
531         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
532                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
533         if (ol_flags & PKT_TX_MACSEC)
534                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
535         return cmdtype;
536 }
537
538 /* Default RS bit threshold values */
539 #ifndef DEFAULT_TX_RS_THRESH
540 #define DEFAULT_TX_RS_THRESH   32
541 #endif
542 #ifndef DEFAULT_TX_FREE_THRESH
543 #define DEFAULT_TX_FREE_THRESH 32
544 #endif
545
546 /* Reset transmit descriptors after they have been used */
547 static inline int
548 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
549 {
550         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
551         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
552         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
553         uint16_t nb_tx_desc = txq->nb_tx_desc;
554         uint16_t desc_to_clean_to;
555         uint16_t nb_tx_to_clean;
556         uint32_t status;
557
558         /* Determine the last descriptor needing to be cleaned */
559         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
560         if (desc_to_clean_to >= nb_tx_desc)
561                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
562
563         /* Check to make sure the last descriptor to clean is done */
564         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
565         status = txr[desc_to_clean_to].wb.status;
566         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
567                 PMD_TX_FREE_LOG(DEBUG,
568                                 "TX descriptor %4u is not done"
569                                 "(port=%d queue=%d)",
570                                 desc_to_clean_to,
571                                 txq->port_id, txq->queue_id);
572                 /* Failed to clean any descriptors, better luck next time */
573                 return -(1);
574         }
575
576         /* Figure out how many descriptors will be cleaned */
577         if (last_desc_cleaned > desc_to_clean_to)
578                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
579                                                         desc_to_clean_to);
580         else
581                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
582                                                 last_desc_cleaned);
583
584         PMD_TX_FREE_LOG(DEBUG,
585                         "Cleaning %4u TX descriptors: %4u to %4u "
586                         "(port=%d queue=%d)",
587                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
588                         txq->port_id, txq->queue_id);
589
590         /*
591          * The last descriptor to clean is done, so that means all the
592          * descriptors from the last descriptor that was cleaned
593          * up to the last descriptor with the RS bit set
594          * are done. Only reset the threshold descriptor.
595          */
596         txr[desc_to_clean_to].wb.status = 0;
597
598         /* Update the txq to reflect the last descriptor that was cleaned */
599         txq->last_desc_cleaned = desc_to_clean_to;
600         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
601
602         /* No Error */
603         return 0;
604 }
605
606 uint16_t
607 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
608                 uint16_t nb_pkts)
609 {
610         struct ixgbe_tx_queue *txq;
611         struct ixgbe_tx_entry *sw_ring;
612         struct ixgbe_tx_entry *txe, *txn;
613         volatile union ixgbe_adv_tx_desc *txr;
614         volatile union ixgbe_adv_tx_desc *txd, *txp;
615         struct rte_mbuf     *tx_pkt;
616         struct rte_mbuf     *m_seg;
617         uint64_t buf_dma_addr;
618         uint32_t olinfo_status;
619         uint32_t cmd_type_len;
620         uint32_t pkt_len;
621         uint16_t slen;
622         uint64_t ol_flags;
623         uint16_t tx_id;
624         uint16_t tx_last;
625         uint16_t nb_tx;
626         uint16_t nb_used;
627         uint64_t tx_ol_req;
628         uint32_t ctx = 0;
629         uint32_t new_ctx;
630         union ixgbe_tx_offload tx_offload;
631
632         tx_offload.data[0] = 0;
633         tx_offload.data[1] = 0;
634         txq = tx_queue;
635         sw_ring = txq->sw_ring;
636         txr     = txq->tx_ring;
637         tx_id   = txq->tx_tail;
638         txe = &sw_ring[tx_id];
639         txp = NULL;
640
641         /* Determine if the descriptor ring needs to be cleaned. */
642         if (txq->nb_tx_free < txq->tx_free_thresh)
643                 ixgbe_xmit_cleanup(txq);
644
645         rte_prefetch0(&txe->mbuf->pool);
646
647         /* TX loop */
648         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
649                 new_ctx = 0;
650                 tx_pkt = *tx_pkts++;
651                 pkt_len = tx_pkt->pkt_len;
652
653                 /*
654                  * Determine how many (if any) context descriptors
655                  * are needed for offload functionality.
656                  */
657                 ol_flags = tx_pkt->ol_flags;
658
659                 /* If hardware offload required */
660                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
661                 if (tx_ol_req) {
662                         tx_offload.l2_len = tx_pkt->l2_len;
663                         tx_offload.l3_len = tx_pkt->l3_len;
664                         tx_offload.l4_len = tx_pkt->l4_len;
665                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
666                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
667                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
668                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
669
670                         /* If new context need be built or reuse the exist ctx. */
671                         ctx = what_advctx_update(txq, tx_ol_req,
672                                 tx_offload);
673                         /* Only allocate context descriptor if required*/
674                         new_ctx = (ctx == IXGBE_CTX_NUM);
675                         ctx = txq->ctx_curr;
676                 }
677
678                 /*
679                  * Keep track of how many descriptors are used this loop
680                  * This will always be the number of segments + the number of
681                  * Context descriptors required to transmit the packet
682                  */
683                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
684
685                 if (txp != NULL &&
686                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
687                         /* set RS on the previous packet in the burst */
688                         txp->read.cmd_type_len |=
689                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
690
691                 /*
692                  * The number of descriptors that must be allocated for a
693                  * packet is the number of segments of that packet, plus 1
694                  * Context Descriptor for the hardware offload, if any.
695                  * Determine the last TX descriptor to allocate in the TX ring
696                  * for the packet, starting from the current position (tx_id)
697                  * in the ring.
698                  */
699                 tx_last = (uint16_t) (tx_id + nb_used - 1);
700
701                 /* Circular ring */
702                 if (tx_last >= txq->nb_tx_desc)
703                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
704
705                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
706                            " tx_first=%u tx_last=%u",
707                            (unsigned) txq->port_id,
708                            (unsigned) txq->queue_id,
709                            (unsigned) pkt_len,
710                            (unsigned) tx_id,
711                            (unsigned) tx_last);
712
713                 /*
714                  * Make sure there are enough TX descriptors available to
715                  * transmit the entire packet.
716                  * nb_used better be less than or equal to txq->tx_rs_thresh
717                  */
718                 if (nb_used > txq->nb_tx_free) {
719                         PMD_TX_FREE_LOG(DEBUG,
720                                         "Not enough free TX descriptors "
721                                         "nb_used=%4u nb_free=%4u "
722                                         "(port=%d queue=%d)",
723                                         nb_used, txq->nb_tx_free,
724                                         txq->port_id, txq->queue_id);
725
726                         if (ixgbe_xmit_cleanup(txq) != 0) {
727                                 /* Could not clean any descriptors */
728                                 if (nb_tx == 0)
729                                         return 0;
730                                 goto end_of_tx;
731                         }
732
733                         /* nb_used better be <= txq->tx_rs_thresh */
734                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
735                                 PMD_TX_FREE_LOG(DEBUG,
736                                         "The number of descriptors needed to "
737                                         "transmit the packet exceeds the "
738                                         "RS bit threshold. This will impact "
739                                         "performance."
740                                         "nb_used=%4u nb_free=%4u "
741                                         "tx_rs_thresh=%4u. "
742                                         "(port=%d queue=%d)",
743                                         nb_used, txq->nb_tx_free,
744                                         txq->tx_rs_thresh,
745                                         txq->port_id, txq->queue_id);
746                                 /*
747                                  * Loop here until there are enough TX
748                                  * descriptors or until the ring cannot be
749                                  * cleaned.
750                                  */
751                                 while (nb_used > txq->nb_tx_free) {
752                                         if (ixgbe_xmit_cleanup(txq) != 0) {
753                                                 /*
754                                                  * Could not clean any
755                                                  * descriptors
756                                                  */
757                                                 if (nb_tx == 0)
758                                                         return 0;
759                                                 goto end_of_tx;
760                                         }
761                                 }
762                         }
763                 }
764
765                 /*
766                  * By now there are enough free TX descriptors to transmit
767                  * the packet.
768                  */
769
770                 /*
771                  * Set common flags of all TX Data Descriptors.
772                  *
773                  * The following bits must be set in all Data Descriptors:
774                  *   - IXGBE_ADVTXD_DTYP_DATA
775                  *   - IXGBE_ADVTXD_DCMD_DEXT
776                  *
777                  * The following bits must be set in the first Data Descriptor
778                  * and are ignored in the other ones:
779                  *   - IXGBE_ADVTXD_DCMD_IFCS
780                  *   - IXGBE_ADVTXD_MAC_1588
781                  *   - IXGBE_ADVTXD_DCMD_VLE
782                  *
783                  * The following bits must only be set in the last Data
784                  * Descriptor:
785                  *   - IXGBE_TXD_CMD_EOP
786                  *
787                  * The following bits can be set in any Data Descriptor, but
788                  * are only set in the last Data Descriptor:
789                  *   - IXGBE_TXD_CMD_RS
790                  */
791                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
792                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
793
794 #ifdef RTE_LIBRTE_IEEE1588
795                 if (ol_flags & PKT_TX_IEEE1588_TMST)
796                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
797 #endif
798
799                 olinfo_status = 0;
800                 if (tx_ol_req) {
801
802                         if (ol_flags & PKT_TX_TCP_SEG) {
803                                 /* when TSO is on, paylen in descriptor is the
804                                  * not the packet len but the tcp payload len */
805                                 pkt_len -= (tx_offload.l2_len +
806                                         tx_offload.l3_len + tx_offload.l4_len);
807                         }
808
809                         /*
810                          * Setup the TX Advanced Context Descriptor if required
811                          */
812                         if (new_ctx) {
813                                 volatile struct ixgbe_adv_tx_context_desc *
814                                     ctx_txd;
815
816                                 ctx_txd = (volatile struct
817                                     ixgbe_adv_tx_context_desc *)
818                                     &txr[tx_id];
819
820                                 txn = &sw_ring[txe->next_id];
821                                 rte_prefetch0(&txn->mbuf->pool);
822
823                                 if (txe->mbuf != NULL) {
824                                         rte_pktmbuf_free_seg(txe->mbuf);
825                                         txe->mbuf = NULL;
826                                 }
827
828                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
829                                         tx_offload);
830
831                                 txe->last_id = tx_last;
832                                 tx_id = txe->next_id;
833                                 txe = txn;
834                         }
835
836                         /*
837                          * Setup the TX Advanced Data Descriptor,
838                          * This path will go through
839                          * whatever new/reuse the context descriptor
840                          */
841                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
842                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
843                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
844                 }
845
846                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
847
848                 m_seg = tx_pkt;
849                 do {
850                         txd = &txr[tx_id];
851                         txn = &sw_ring[txe->next_id];
852                         rte_prefetch0(&txn->mbuf->pool);
853
854                         if (txe->mbuf != NULL)
855                                 rte_pktmbuf_free_seg(txe->mbuf);
856                         txe->mbuf = m_seg;
857
858                         /*
859                          * Set up Transmit Data Descriptor.
860                          */
861                         slen = m_seg->data_len;
862                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
863                         txd->read.buffer_addr =
864                                 rte_cpu_to_le_64(buf_dma_addr);
865                         txd->read.cmd_type_len =
866                                 rte_cpu_to_le_32(cmd_type_len | slen);
867                         txd->read.olinfo_status =
868                                 rte_cpu_to_le_32(olinfo_status);
869                         txe->last_id = tx_last;
870                         tx_id = txe->next_id;
871                         txe = txn;
872                         m_seg = m_seg->next;
873                 } while (m_seg != NULL);
874
875                 /*
876                  * The last packet data descriptor needs End Of Packet (EOP)
877                  */
878                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
879                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
880                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
881
882                 /* Set RS bit only on threshold packets' last descriptor */
883                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
884                         PMD_TX_FREE_LOG(DEBUG,
885                                         "Setting RS bit on TXD id="
886                                         "%4u (port=%d queue=%d)",
887                                         tx_last, txq->port_id, txq->queue_id);
888
889                         cmd_type_len |= IXGBE_TXD_CMD_RS;
890
891                         /* Update txq RS bit counters */
892                         txq->nb_tx_used = 0;
893                         txp = NULL;
894                 } else
895                         txp = txd;
896
897                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
898         }
899
900 end_of_tx:
901         /* set RS on last packet in the burst */
902         if (txp != NULL)
903                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
904
905         rte_wmb();
906
907         /*
908          * Set the Transmit Descriptor Tail (TDT)
909          */
910         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
911                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
912                    (unsigned) tx_id, (unsigned) nb_tx);
913         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
914         txq->tx_tail = tx_id;
915
916         return nb_tx;
917 }
918
919 /*********************************************************************
920  *
921  *  TX prep functions
922  *
923  **********************************************************************/
924 uint16_t
925 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
926 {
927         int i, ret;
928         uint64_t ol_flags;
929         struct rte_mbuf *m;
930         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
931
932         for (i = 0; i < nb_pkts; i++) {
933                 m = tx_pkts[i];
934                 ol_flags = m->ol_flags;
935
936                 /**
937                  * Check if packet meets requirements for number of segments
938                  *
939                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
940                  *       non-TSO
941                  */
942
943                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
944                         rte_errno = -EINVAL;
945                         return i;
946                 }
947
948                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
949                         rte_errno = -ENOTSUP;
950                         return i;
951                 }
952
953 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
954                 ret = rte_validate_tx_offload(m);
955                 if (ret != 0) {
956                         rte_errno = ret;
957                         return i;
958                 }
959 #endif
960                 ret = rte_net_intel_cksum_prepare(m);
961                 if (ret != 0) {
962                         rte_errno = ret;
963                         return i;
964                 }
965         }
966
967         return i;
968 }
969
970 /*********************************************************************
971  *
972  *  RX functions
973  *
974  **********************************************************************/
975
976 #define IXGBE_PACKET_TYPE_ETHER                         0X00
977 #define IXGBE_PACKET_TYPE_IPV4                          0X01
978 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
979 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
980 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
981 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
982 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
983 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
984 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
985 #define IXGBE_PACKET_TYPE_IPV6                          0X04
986 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
987 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
988 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
989 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
990 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
991 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
992 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
993 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
994 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
995 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
996 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
997 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
998 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
999 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1000 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1001 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1002 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1003 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1004 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1005 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1006 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1007 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1008 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1009
1010 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1011 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1012 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1013 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1014 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1015 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1016 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1017 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1018 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1019 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1020 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1021 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1022 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1023 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1024 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1025 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1026 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1027 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1028 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1029 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1030 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1031 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1032 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1033
1034 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1035 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1036 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1037 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1038 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1039 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1040 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1041 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1042 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1043 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1044 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1045 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1046 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1047 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1048 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1049 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1050 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1051 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1052 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1053 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1054 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1055 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1056 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1057
1058 #define IXGBE_PACKET_TYPE_MAX               0X80
1059 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1060 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1061
1062 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1063 static inline uint32_t
1064 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1065 {
1066         /**
1067          * Use 2 different table for normal packet and tunnel packet
1068          * to save the space.
1069          */
1070         static const uint32_t
1071                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1072                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1073                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1074                         RTE_PTYPE_L3_IPV4,
1075                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1077                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1079                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1085                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1086                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1087                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1089                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1090                         RTE_PTYPE_L3_IPV6,
1091                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1092                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1093                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1094                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1095                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1096                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1097                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV6_EXT,
1099                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1101                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1102                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1103                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1105                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1106                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1107                         RTE_PTYPE_INNER_L3_IPV6,
1108                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1110                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1111                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1112                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1113                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1114                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1116                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1117                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1118                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1119                         RTE_PTYPE_INNER_L3_IPV6,
1120                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1122                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1125                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1126                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1128                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1129                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1130                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1132                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1134                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1135                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1137                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1140                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1141                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1142                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1143                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1144                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1145                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1146                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1147                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1148                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1149                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1150                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1151                         RTE_PTYPE_L2_ETHER |
1152                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1154         };
1155
1156         static const uint32_t
1157                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1158                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1160                         RTE_PTYPE_INNER_L2_ETHER,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1164                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1166                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1167                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1169                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1176                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1178                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1179                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1180                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1181                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1182                         RTE_PTYPE_INNER_L4_TCP,
1183                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1184                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1185                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1186                         RTE_PTYPE_INNER_L4_TCP,
1187                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1189                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1190                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1192                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1193                         RTE_PTYPE_INNER_L4_TCP,
1194                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1195                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1196                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1197                         RTE_PTYPE_INNER_L3_IPV4,
1198                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1199                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1201                         RTE_PTYPE_INNER_L4_UDP,
1202                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1205                         RTE_PTYPE_INNER_L4_UDP,
1206                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1207                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1209                         RTE_PTYPE_INNER_L4_SCTP,
1210                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1211                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1216                         RTE_PTYPE_INNER_L4_UDP,
1217                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1220                         RTE_PTYPE_INNER_L4_SCTP,
1221                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1224                         RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1228                         RTE_PTYPE_INNER_L4_SCTP,
1229                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1231                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1232                         RTE_PTYPE_INNER_L4_SCTP,
1233                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1236                         RTE_PTYPE_INNER_L4_TCP,
1237                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1240                         RTE_PTYPE_INNER_L4_UDP,
1241
1242                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1244                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV4,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1250                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1251                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1252                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV6,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4,
1269                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1270                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1271                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1272                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1273                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1274                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1275                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1276                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1277                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1278                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1280                         RTE_PTYPE_INNER_L3_IPV4,
1281                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1282                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1285                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1286                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1287                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1288                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1289                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1290                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1293                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1294                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1297                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1298                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1301                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1302                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                         RTE_PTYPE_INNER_L3_IPV4,
1305                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1306                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1309                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1310                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1313                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1314                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1315                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1316                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1317                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1318                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1321                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1322                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1323                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1324                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1325                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1326                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1329                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1330                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1333         };
1334
1335         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1336                 return RTE_PTYPE_UNKNOWN;
1337
1338         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1339
1340         /* For tunnel packet */
1341         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1342                 /* Remove the tunnel bit to save the space. */
1343                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1344                 return ptype_table_tn[pkt_info];
1345         }
1346
1347         /**
1348          * For x550, if it's not tunnel,
1349          * tunnel type bit should be set to 0.
1350          * Reuse 82599's mask.
1351          */
1352         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1353
1354         return ptype_table[pkt_info];
1355 }
1356
1357 static inline uint64_t
1358 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1359 {
1360         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1361                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1362                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1363                 PKT_RX_RSS_HASH, 0, 0, 0,
1364                 0, 0, 0,  PKT_RX_FDIR,
1365         };
1366 #ifdef RTE_LIBRTE_IEEE1588
1367         static uint64_t ip_pkt_etqf_map[8] = {
1368                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1369                 0, 0, 0, 0,
1370         };
1371
1372         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1373                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1374                                 ip_rss_types_map[pkt_info & 0XF];
1375         else
1376                 return ip_rss_types_map[pkt_info & 0XF];
1377 #else
1378         return ip_rss_types_map[pkt_info & 0XF];
1379 #endif
1380 }
1381
1382 static inline uint64_t
1383 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1384 {
1385         uint64_t pkt_flags;
1386
1387         /*
1388          * Check if VLAN present only.
1389          * Do not check whether L3/L4 rx checksum done by NIC or not,
1390          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1391          */
1392         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1393
1394 #ifdef RTE_LIBRTE_IEEE1588
1395         if (rx_status & IXGBE_RXD_STAT_TMST)
1396                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1397 #endif
1398         return pkt_flags;
1399 }
1400
1401 static inline uint64_t
1402 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1403 {
1404         uint64_t pkt_flags;
1405
1406         /*
1407          * Bit 31: IPE, IPv4 checksum error
1408          * Bit 30: L4I, L4I integrity error
1409          */
1410         static uint64_t error_to_pkt_flags_map[4] = {
1411                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1412                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1413                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1414                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1415         };
1416         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1417                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1418
1419         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1420             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1421                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1422         }
1423
1424         return pkt_flags;
1425 }
1426
1427 /*
1428  * LOOK_AHEAD defines how many desc statuses to check beyond the
1429  * current descriptor.
1430  * It must be a pound define for optimal performance.
1431  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1432  * function only works with LOOK_AHEAD=8.
1433  */
1434 #define LOOK_AHEAD 8
1435 #if (LOOK_AHEAD != 8)
1436 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1437 #endif
1438 static inline int
1439 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1440 {
1441         volatile union ixgbe_adv_rx_desc *rxdp;
1442         struct ixgbe_rx_entry *rxep;
1443         struct rte_mbuf *mb;
1444         uint16_t pkt_len;
1445         uint64_t pkt_flags;
1446         int nb_dd;
1447         uint32_t s[LOOK_AHEAD];
1448         uint32_t pkt_info[LOOK_AHEAD];
1449         int i, j, nb_rx = 0;
1450         uint32_t status;
1451         uint64_t vlan_flags = rxq->vlan_flags;
1452
1453         /* get references to current descriptor and S/W ring entry */
1454         rxdp = &rxq->rx_ring[rxq->rx_tail];
1455         rxep = &rxq->sw_ring[rxq->rx_tail];
1456
1457         status = rxdp->wb.upper.status_error;
1458         /* check to make sure there is at least 1 packet to receive */
1459         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1460                 return 0;
1461
1462         /*
1463          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1464          * reference packets that are ready to be received.
1465          */
1466         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1467              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1468                 /* Read desc statuses backwards to avoid race condition */
1469                 for (j = 0; j < LOOK_AHEAD; j++)
1470                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1471
1472                 rte_smp_rmb();
1473
1474                 /* Compute how many status bits were set */
1475                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1476                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1477                         ;
1478
1479                 for (j = 0; j < nb_dd; j++)
1480                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1481                                                        lo_dword.data);
1482
1483                 nb_rx += nb_dd;
1484
1485                 /* Translate descriptor info to mbuf format */
1486                 for (j = 0; j < nb_dd; ++j) {
1487                         mb = rxep[j].mbuf;
1488                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1489                                   rxq->crc_len;
1490                         mb->data_len = pkt_len;
1491                         mb->pkt_len = pkt_len;
1492                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1493
1494                         /* convert descriptor fields to rte mbuf flags */
1495                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1496                                 vlan_flags);
1497                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1498                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1499                                         ((uint16_t)pkt_info[j]);
1500                         mb->ol_flags = pkt_flags;
1501                         mb->packet_type =
1502                                 ixgbe_rxd_pkt_info_to_pkt_type
1503                                         (pkt_info[j], rxq->pkt_type_mask);
1504
1505                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1506                                 mb->hash.rss = rte_le_to_cpu_32(
1507                                     rxdp[j].wb.lower.hi_dword.rss);
1508                         else if (pkt_flags & PKT_RX_FDIR) {
1509                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1510                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1511                                     IXGBE_ATR_HASH_MASK;
1512                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1513                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1514                         }
1515                 }
1516
1517                 /* Move mbuf pointers from the S/W ring to the stage */
1518                 for (j = 0; j < LOOK_AHEAD; ++j) {
1519                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1520                 }
1521
1522                 /* stop if all requested packets could not be received */
1523                 if (nb_dd != LOOK_AHEAD)
1524                         break;
1525         }
1526
1527         /* clear software ring entries so we can cleanup correctly */
1528         for (i = 0; i < nb_rx; ++i) {
1529                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1530         }
1531
1532
1533         return nb_rx;
1534 }
1535
1536 static inline int
1537 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1538 {
1539         volatile union ixgbe_adv_rx_desc *rxdp;
1540         struct ixgbe_rx_entry *rxep;
1541         struct rte_mbuf *mb;
1542         uint16_t alloc_idx;
1543         __le64 dma_addr;
1544         int diag, i;
1545
1546         /* allocate buffers in bulk directly into the S/W ring */
1547         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1548         rxep = &rxq->sw_ring[alloc_idx];
1549         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1550                                     rxq->rx_free_thresh);
1551         if (unlikely(diag != 0))
1552                 return -ENOMEM;
1553
1554         rxdp = &rxq->rx_ring[alloc_idx];
1555         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1556                 /* populate the static rte mbuf fields */
1557                 mb = rxep[i].mbuf;
1558                 if (reset_mbuf) {
1559                         mb->next = NULL;
1560                         mb->nb_segs = 1;
1561                         mb->port = rxq->port_id;
1562                 }
1563
1564                 rte_mbuf_refcnt_set(mb, 1);
1565                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1566
1567                 /* populate the descriptors */
1568                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1569                 rxdp[i].read.hdr_addr = 0;
1570                 rxdp[i].read.pkt_addr = dma_addr;
1571         }
1572
1573         /* update state of internal queue structure */
1574         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1575         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1576                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1577
1578         /* no errors */
1579         return 0;
1580 }
1581
1582 static inline uint16_t
1583 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1584                          uint16_t nb_pkts)
1585 {
1586         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1587         int i;
1588
1589         /* how many packets are ready to return? */
1590         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1591
1592         /* copy mbuf pointers to the application's packet list */
1593         for (i = 0; i < nb_pkts; ++i)
1594                 rx_pkts[i] = stage[i];
1595
1596         /* update internal queue state */
1597         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1598         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1599
1600         return nb_pkts;
1601 }
1602
1603 static inline uint16_t
1604 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1605              uint16_t nb_pkts)
1606 {
1607         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1608         uint16_t nb_rx = 0;
1609
1610         /* Any previously recv'd pkts will be returned from the Rx stage */
1611         if (rxq->rx_nb_avail)
1612                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1613
1614         /* Scan the H/W ring for packets to receive */
1615         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1616
1617         /* update internal queue state */
1618         rxq->rx_next_avail = 0;
1619         rxq->rx_nb_avail = nb_rx;
1620         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1621
1622         /* if required, allocate new buffers to replenish descriptors */
1623         if (rxq->rx_tail > rxq->rx_free_trigger) {
1624                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1625
1626                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1627                         int i, j;
1628
1629                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1630                                    "queue_id=%u", (unsigned) rxq->port_id,
1631                                    (unsigned) rxq->queue_id);
1632
1633                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1634                                 rxq->rx_free_thresh;
1635
1636                         /*
1637                          * Need to rewind any previous receives if we cannot
1638                          * allocate new buffers to replenish the old ones.
1639                          */
1640                         rxq->rx_nb_avail = 0;
1641                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1642                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1643                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1644
1645                         return 0;
1646                 }
1647
1648                 /* update tail pointer */
1649                 rte_wmb();
1650                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1651                                             cur_free_trigger);
1652         }
1653
1654         if (rxq->rx_tail >= rxq->nb_rx_desc)
1655                 rxq->rx_tail = 0;
1656
1657         /* received any packets this loop? */
1658         if (rxq->rx_nb_avail)
1659                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1660
1661         return 0;
1662 }
1663
1664 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1665 uint16_t
1666 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1667                            uint16_t nb_pkts)
1668 {
1669         uint16_t nb_rx;
1670
1671         if (unlikely(nb_pkts == 0))
1672                 return 0;
1673
1674         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1675                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1676
1677         /* request is relatively large, chunk it up */
1678         nb_rx = 0;
1679         while (nb_pkts) {
1680                 uint16_t ret, n;
1681
1682                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1683                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1684                 nb_rx = (uint16_t)(nb_rx + ret);
1685                 nb_pkts = (uint16_t)(nb_pkts - ret);
1686                 if (ret < n)
1687                         break;
1688         }
1689
1690         return nb_rx;
1691 }
1692
1693 uint16_t
1694 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1695                 uint16_t nb_pkts)
1696 {
1697         struct ixgbe_rx_queue *rxq;
1698         volatile union ixgbe_adv_rx_desc *rx_ring;
1699         volatile union ixgbe_adv_rx_desc *rxdp;
1700         struct ixgbe_rx_entry *sw_ring;
1701         struct ixgbe_rx_entry *rxe;
1702         struct rte_mbuf *rxm;
1703         struct rte_mbuf *nmb;
1704         union ixgbe_adv_rx_desc rxd;
1705         uint64_t dma_addr;
1706         uint32_t staterr;
1707         uint32_t pkt_info;
1708         uint16_t pkt_len;
1709         uint16_t rx_id;
1710         uint16_t nb_rx;
1711         uint16_t nb_hold;
1712         uint64_t pkt_flags;
1713         uint64_t vlan_flags;
1714
1715         nb_rx = 0;
1716         nb_hold = 0;
1717         rxq = rx_queue;
1718         rx_id = rxq->rx_tail;
1719         rx_ring = rxq->rx_ring;
1720         sw_ring = rxq->sw_ring;
1721         vlan_flags = rxq->vlan_flags;
1722         while (nb_rx < nb_pkts) {
1723                 /*
1724                  * The order of operations here is important as the DD status
1725                  * bit must not be read after any other descriptor fields.
1726                  * rx_ring and rxdp are pointing to volatile data so the order
1727                  * of accesses cannot be reordered by the compiler. If they were
1728                  * not volatile, they could be reordered which could lead to
1729                  * using invalid descriptor fields when read from rxd.
1730                  */
1731                 rxdp = &rx_ring[rx_id];
1732                 staterr = rxdp->wb.upper.status_error;
1733                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1734                         break;
1735                 rxd = *rxdp;
1736
1737                 /*
1738                  * End of packet.
1739                  *
1740                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1741                  * is likely to be invalid and to be dropped by the various
1742                  * validation checks performed by the network stack.
1743                  *
1744                  * Allocate a new mbuf to replenish the RX ring descriptor.
1745                  * If the allocation fails:
1746                  *    - arrange for that RX descriptor to be the first one
1747                  *      being parsed the next time the receive function is
1748                  *      invoked [on the same queue].
1749                  *
1750                  *    - Stop parsing the RX ring and return immediately.
1751                  *
1752                  * This policy do not drop the packet received in the RX
1753                  * descriptor for which the allocation of a new mbuf failed.
1754                  * Thus, it allows that packet to be later retrieved if
1755                  * mbuf have been freed in the mean time.
1756                  * As a side effect, holding RX descriptors instead of
1757                  * systematically giving them back to the NIC may lead to
1758                  * RX ring exhaustion situations.
1759                  * However, the NIC can gracefully prevent such situations
1760                  * to happen by sending specific "back-pressure" flow control
1761                  * frames to its peer(s).
1762                  */
1763                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1764                            "ext_err_stat=0x%08x pkt_len=%u",
1765                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1766                            (unsigned) rx_id, (unsigned) staterr,
1767                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1768
1769                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1770                 if (nmb == NULL) {
1771                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1772                                    "queue_id=%u", (unsigned) rxq->port_id,
1773                                    (unsigned) rxq->queue_id);
1774                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1775                         break;
1776                 }
1777
1778                 nb_hold++;
1779                 rxe = &sw_ring[rx_id];
1780                 rx_id++;
1781                 if (rx_id == rxq->nb_rx_desc)
1782                         rx_id = 0;
1783
1784                 /* Prefetch next mbuf while processing current one. */
1785                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1786
1787                 /*
1788                  * When next RX descriptor is on a cache-line boundary,
1789                  * prefetch the next 4 RX descriptors and the next 8 pointers
1790                  * to mbufs.
1791                  */
1792                 if ((rx_id & 0x3) == 0) {
1793                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1794                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1795                 }
1796
1797                 rxm = rxe->mbuf;
1798                 rxe->mbuf = nmb;
1799                 dma_addr =
1800                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1801                 rxdp->read.hdr_addr = 0;
1802                 rxdp->read.pkt_addr = dma_addr;
1803
1804                 /*
1805                  * Initialize the returned mbuf.
1806                  * 1) setup generic mbuf fields:
1807                  *    - number of segments,
1808                  *    - next segment,
1809                  *    - packet length,
1810                  *    - RX port identifier.
1811                  * 2) integrate hardware offload data, if any:
1812                  *    - RSS flag & hash,
1813                  *    - IP checksum flag,
1814                  *    - VLAN TCI, if any,
1815                  *    - error flags.
1816                  */
1817                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1818                                       rxq->crc_len);
1819                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1820                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1821                 rxm->nb_segs = 1;
1822                 rxm->next = NULL;
1823                 rxm->pkt_len = pkt_len;
1824                 rxm->data_len = pkt_len;
1825                 rxm->port = rxq->port_id;
1826
1827                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1828                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1829                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1830
1831                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1832                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1833                 pkt_flags = pkt_flags |
1834                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1835                 rxm->ol_flags = pkt_flags;
1836                 rxm->packet_type =
1837                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1838                                                        rxq->pkt_type_mask);
1839
1840                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1841                         rxm->hash.rss = rte_le_to_cpu_32(
1842                                                 rxd.wb.lower.hi_dword.rss);
1843                 else if (pkt_flags & PKT_RX_FDIR) {
1844                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1845                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1846                                         IXGBE_ATR_HASH_MASK;
1847                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1848                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1849                 }
1850                 /*
1851                  * Store the mbuf address into the next entry of the array
1852                  * of returned packets.
1853                  */
1854                 rx_pkts[nb_rx++] = rxm;
1855         }
1856         rxq->rx_tail = rx_id;
1857
1858         /*
1859          * If the number of free RX descriptors is greater than the RX free
1860          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1861          * register.
1862          * Update the RDT with the value of the last processed RX descriptor
1863          * minus 1, to guarantee that the RDT register is never equal to the
1864          * RDH register, which creates a "full" ring situtation from the
1865          * hardware point of view...
1866          */
1867         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1868         if (nb_hold > rxq->rx_free_thresh) {
1869                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1870                            "nb_hold=%u nb_rx=%u",
1871                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1872                            (unsigned) rx_id, (unsigned) nb_hold,
1873                            (unsigned) nb_rx);
1874                 rx_id = (uint16_t) ((rx_id == 0) ?
1875                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1876                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1877                 nb_hold = 0;
1878         }
1879         rxq->nb_rx_hold = nb_hold;
1880         return nb_rx;
1881 }
1882
1883 /**
1884  * Detect an RSC descriptor.
1885  */
1886 static inline uint32_t
1887 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1888 {
1889         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1890                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1891 }
1892
1893 /**
1894  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1895  *
1896  * Fill the following info in the HEAD buffer of the Rx cluster:
1897  *    - RX port identifier
1898  *    - hardware offload data, if any:
1899  *      - RSS flag & hash
1900  *      - IP checksum flag
1901  *      - VLAN TCI, if any
1902  *      - error flags
1903  * @head HEAD of the packet cluster
1904  * @desc HW descriptor to get data from
1905  * @rxq Pointer to the Rx queue
1906  */
1907 static inline void
1908 ixgbe_fill_cluster_head_buf(
1909         struct rte_mbuf *head,
1910         union ixgbe_adv_rx_desc *desc,
1911         struct ixgbe_rx_queue *rxq,
1912         uint32_t staterr)
1913 {
1914         uint32_t pkt_info;
1915         uint64_t pkt_flags;
1916
1917         head->port = rxq->port_id;
1918
1919         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1920          * set in the pkt_flags field.
1921          */
1922         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1923         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1924         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1925         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1926         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1927         head->ol_flags = pkt_flags;
1928         head->packet_type =
1929                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1930
1931         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1932                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1933         else if (pkt_flags & PKT_RX_FDIR) {
1934                 head->hash.fdir.hash =
1935                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1936                                                           & IXGBE_ATR_HASH_MASK;
1937                 head->hash.fdir.id =
1938                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1939         }
1940 }
1941
1942 /**
1943  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1944  *
1945  * @rx_queue Rx queue handle
1946  * @rx_pkts table of received packets
1947  * @nb_pkts size of rx_pkts table
1948  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1949  *
1950  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1951  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1952  *
1953  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1954  * 1) When non-EOP RSC completion arrives:
1955  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1956  *       segment's data length.
1957  *    b) Set the "next" pointer of the current segment to point to the segment
1958  *       at the NEXTP index.
1959  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1960  *       in the sw_rsc_ring.
1961  * 2) When EOP arrives we just update the cluster's total length and offload
1962  *    flags and deliver the cluster up to the upper layers. In our case - put it
1963  *    in the rx_pkts table.
1964  *
1965  * Returns the number of received packets/clusters (according to the "bulk
1966  * receive" interface).
1967  */
1968 static inline uint16_t
1969 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1970                     bool bulk_alloc)
1971 {
1972         struct ixgbe_rx_queue *rxq = rx_queue;
1973         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1974         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1975         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1976         uint16_t rx_id = rxq->rx_tail;
1977         uint16_t nb_rx = 0;
1978         uint16_t nb_hold = rxq->nb_rx_hold;
1979         uint16_t prev_id = rxq->rx_tail;
1980
1981         while (nb_rx < nb_pkts) {
1982                 bool eop;
1983                 struct ixgbe_rx_entry *rxe;
1984                 struct ixgbe_scattered_rx_entry *sc_entry;
1985                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1986                 struct ixgbe_rx_entry *next_rxe = NULL;
1987                 struct rte_mbuf *first_seg;
1988                 struct rte_mbuf *rxm;
1989                 struct rte_mbuf *nmb;
1990                 union ixgbe_adv_rx_desc rxd;
1991                 uint16_t data_len;
1992                 uint16_t next_id;
1993                 volatile union ixgbe_adv_rx_desc *rxdp;
1994                 uint32_t staterr;
1995
1996 next_desc:
1997                 /*
1998                  * The code in this whole file uses the volatile pointer to
1999                  * ensure the read ordering of the status and the rest of the
2000                  * descriptor fields (on the compiler level only!!!). This is so
2001                  * UGLY - why not to just use the compiler barrier instead? DPDK
2002                  * even has the rte_compiler_barrier() for that.
2003                  *
2004                  * But most importantly this is just wrong because this doesn't
2005                  * ensure memory ordering in a general case at all. For
2006                  * instance, DPDK is supposed to work on Power CPUs where
2007                  * compiler barrier may just not be enough!
2008                  *
2009                  * I tried to write only this function properly to have a
2010                  * starting point (as a part of an LRO/RSC series) but the
2011                  * compiler cursed at me when I tried to cast away the
2012                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2013                  * keeping it the way it is for now.
2014                  *
2015                  * The code in this file is broken in so many other places and
2016                  * will just not work on a big endian CPU anyway therefore the
2017                  * lines below will have to be revisited together with the rest
2018                  * of the ixgbe PMD.
2019                  *
2020                  * TODO:
2021                  *    - Get rid of "volatile" crap and let the compiler do its
2022                  *      job.
2023                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2024                  *      memory ordering below.
2025                  */
2026                 rxdp = &rx_ring[rx_id];
2027                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2028
2029                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2030                         break;
2031
2032                 rxd = *rxdp;
2033
2034                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2035                                   "staterr=0x%x data_len=%u",
2036                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2037                            rte_le_to_cpu_16(rxd.wb.upper.length));
2038
2039                 if (!bulk_alloc) {
2040                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2041                         if (nmb == NULL) {
2042                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2043                                                   "port_id=%u queue_id=%u",
2044                                            rxq->port_id, rxq->queue_id);
2045
2046                                 rte_eth_devices[rxq->port_id].data->
2047                                                         rx_mbuf_alloc_failed++;
2048                                 break;
2049                         }
2050                 } else if (nb_hold > rxq->rx_free_thresh) {
2051                         uint16_t next_rdt = rxq->rx_free_trigger;
2052
2053                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2054                                 rte_wmb();
2055                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2056                                                             next_rdt);
2057                                 nb_hold -= rxq->rx_free_thresh;
2058                         } else {
2059                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2060                                                   "port_id=%u queue_id=%u",
2061                                            rxq->port_id, rxq->queue_id);
2062
2063                                 rte_eth_devices[rxq->port_id].data->
2064                                                         rx_mbuf_alloc_failed++;
2065                                 break;
2066                         }
2067                 }
2068
2069                 nb_hold++;
2070                 rxe = &sw_ring[rx_id];
2071                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2072
2073                 next_id = rx_id + 1;
2074                 if (next_id == rxq->nb_rx_desc)
2075                         next_id = 0;
2076
2077                 /* Prefetch next mbuf while processing current one. */
2078                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2079
2080                 /*
2081                  * When next RX descriptor is on a cache-line boundary,
2082                  * prefetch the next 4 RX descriptors and the next 4 pointers
2083                  * to mbufs.
2084                  */
2085                 if ((next_id & 0x3) == 0) {
2086                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2087                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2088                 }
2089
2090                 rxm = rxe->mbuf;
2091
2092                 if (!bulk_alloc) {
2093                         __le64 dma =
2094                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2095                         /*
2096                          * Update RX descriptor with the physical address of the
2097                          * new data buffer of the new allocated mbuf.
2098                          */
2099                         rxe->mbuf = nmb;
2100
2101                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2102                         rxdp->read.hdr_addr = 0;
2103                         rxdp->read.pkt_addr = dma;
2104                 } else
2105                         rxe->mbuf = NULL;
2106
2107                 /*
2108                  * Set data length & data buffer address of mbuf.
2109                  */
2110                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2111                 rxm->data_len = data_len;
2112
2113                 if (!eop) {
2114                         uint16_t nextp_id;
2115                         /*
2116                          * Get next descriptor index:
2117                          *  - For RSC it's in the NEXTP field.
2118                          *  - For a scattered packet - it's just a following
2119                          *    descriptor.
2120                          */
2121                         if (ixgbe_rsc_count(&rxd))
2122                                 nextp_id =
2123                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2124                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2125                         else
2126                                 nextp_id = next_id;
2127
2128                         next_sc_entry = &sw_sc_ring[nextp_id];
2129                         next_rxe = &sw_ring[nextp_id];
2130                         rte_ixgbe_prefetch(next_rxe);
2131                 }
2132
2133                 sc_entry = &sw_sc_ring[rx_id];
2134                 first_seg = sc_entry->fbuf;
2135                 sc_entry->fbuf = NULL;
2136
2137                 /*
2138                  * If this is the first buffer of the received packet,
2139                  * set the pointer to the first mbuf of the packet and
2140                  * initialize its context.
2141                  * Otherwise, update the total length and the number of segments
2142                  * of the current scattered packet, and update the pointer to
2143                  * the last mbuf of the current packet.
2144                  */
2145                 if (first_seg == NULL) {
2146                         first_seg = rxm;
2147                         first_seg->pkt_len = data_len;
2148                         first_seg->nb_segs = 1;
2149                 } else {
2150                         first_seg->pkt_len += data_len;
2151                         first_seg->nb_segs++;
2152                 }
2153
2154                 prev_id = rx_id;
2155                 rx_id = next_id;
2156
2157                 /*
2158                  * If this is not the last buffer of the received packet, update
2159                  * the pointer to the first mbuf at the NEXTP entry in the
2160                  * sw_sc_ring and continue to parse the RX ring.
2161                  */
2162                 if (!eop && next_rxe) {
2163                         rxm->next = next_rxe->mbuf;
2164                         next_sc_entry->fbuf = first_seg;
2165                         goto next_desc;
2166                 }
2167
2168                 /*
2169                  * This is the last buffer of the received packet - return
2170                  * the current cluster to the user.
2171                  */
2172                 rxm->next = NULL;
2173
2174                 /* Initialize the first mbuf of the returned packet */
2175                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2176
2177                 /*
2178                  * Deal with the case, when HW CRC srip is disabled.
2179                  * That can't happen when LRO is enabled, but still could
2180                  * happen for scattered RX mode.
2181                  */
2182                 first_seg->pkt_len -= rxq->crc_len;
2183                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2184                         struct rte_mbuf *lp;
2185
2186                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2187                                 ;
2188
2189                         first_seg->nb_segs--;
2190                         lp->data_len -= rxq->crc_len - rxm->data_len;
2191                         lp->next = NULL;
2192                         rte_pktmbuf_free_seg(rxm);
2193                 } else
2194                         rxm->data_len -= rxq->crc_len;
2195
2196                 /* Prefetch data of first segment, if configured to do so. */
2197                 rte_packet_prefetch((char *)first_seg->buf_addr +
2198                         first_seg->data_off);
2199
2200                 /*
2201                  * Store the mbuf address into the next entry of the array
2202                  * of returned packets.
2203                  */
2204                 rx_pkts[nb_rx++] = first_seg;
2205         }
2206
2207         /*
2208          * Record index of the next RX descriptor to probe.
2209          */
2210         rxq->rx_tail = rx_id;
2211
2212         /*
2213          * If the number of free RX descriptors is greater than the RX free
2214          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2215          * register.
2216          * Update the RDT with the value of the last processed RX descriptor
2217          * minus 1, to guarantee that the RDT register is never equal to the
2218          * RDH register, which creates a "full" ring situtation from the
2219          * hardware point of view...
2220          */
2221         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2222                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2223                            "nb_hold=%u nb_rx=%u",
2224                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2225
2226                 rte_wmb();
2227                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2228                 nb_hold = 0;
2229         }
2230
2231         rxq->nb_rx_hold = nb_hold;
2232         return nb_rx;
2233 }
2234
2235 uint16_t
2236 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2237                                  uint16_t nb_pkts)
2238 {
2239         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2240 }
2241
2242 uint16_t
2243 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2244                                uint16_t nb_pkts)
2245 {
2246         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2247 }
2248
2249 /*********************************************************************
2250  *
2251  *  Queue management functions
2252  *
2253  **********************************************************************/
2254
2255 static void __attribute__((cold))
2256 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2257 {
2258         unsigned i;
2259
2260         if (txq->sw_ring != NULL) {
2261                 for (i = 0; i < txq->nb_tx_desc; i++) {
2262                         if (txq->sw_ring[i].mbuf != NULL) {
2263                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2264                                 txq->sw_ring[i].mbuf = NULL;
2265                         }
2266                 }
2267         }
2268 }
2269
2270 static void __attribute__((cold))
2271 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2272 {
2273         if (txq != NULL &&
2274             txq->sw_ring != NULL)
2275                 rte_free(txq->sw_ring);
2276 }
2277
2278 static void __attribute__((cold))
2279 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2280 {
2281         if (txq != NULL && txq->ops != NULL) {
2282                 txq->ops->release_mbufs(txq);
2283                 txq->ops->free_swring(txq);
2284                 rte_free(txq);
2285         }
2286 }
2287
2288 void __attribute__((cold))
2289 ixgbe_dev_tx_queue_release(void *txq)
2290 {
2291         ixgbe_tx_queue_release(txq);
2292 }
2293
2294 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2295 static void __attribute__((cold))
2296 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2297 {
2298         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2299         struct ixgbe_tx_entry *txe = txq->sw_ring;
2300         uint16_t prev, i;
2301
2302         /* Zero out HW ring memory */
2303         for (i = 0; i < txq->nb_tx_desc; i++) {
2304                 txq->tx_ring[i] = zeroed_desc;
2305         }
2306
2307         /* Initialize SW ring entries */
2308         prev = (uint16_t) (txq->nb_tx_desc - 1);
2309         for (i = 0; i < txq->nb_tx_desc; i++) {
2310                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2311
2312                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2313                 txe[i].mbuf = NULL;
2314                 txe[i].last_id = i;
2315                 txe[prev].next_id = i;
2316                 prev = i;
2317         }
2318
2319         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2320         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2321
2322         txq->tx_tail = 0;
2323         txq->nb_tx_used = 0;
2324         /*
2325          * Always allow 1 descriptor to be un-allocated to avoid
2326          * a H/W race condition
2327          */
2328         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2329         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2330         txq->ctx_curr = 0;
2331         memset((void *)&txq->ctx_cache, 0,
2332                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2333 }
2334
2335 static const struct ixgbe_txq_ops def_txq_ops = {
2336         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2337         .free_swring = ixgbe_tx_free_swring,
2338         .reset = ixgbe_reset_tx_queue,
2339 };
2340
2341 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2342  * the queue parameters. Used in tx_queue_setup by primary process and then
2343  * in dev_init by secondary process when attaching to an existing ethdev.
2344  */
2345 void __attribute__((cold))
2346 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2347 {
2348         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2349         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2350                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2351                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2352                 dev->tx_pkt_prepare = NULL;
2353 #ifdef RTE_IXGBE_INC_VECTOR
2354                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2355                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2356                                         ixgbe_txq_vec_setup(txq) == 0)) {
2357                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2358                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2359                 } else
2360 #endif
2361                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2362         } else {
2363                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2364                 PMD_INIT_LOG(DEBUG,
2365                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2366                                 (unsigned long)txq->txq_flags,
2367                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2368                 PMD_INIT_LOG(DEBUG,
2369                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2370                                 (unsigned long)txq->tx_rs_thresh,
2371                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2372                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2373                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2374         }
2375 }
2376
2377 int __attribute__((cold))
2378 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2379                          uint16_t queue_idx,
2380                          uint16_t nb_desc,
2381                          unsigned int socket_id,
2382                          const struct rte_eth_txconf *tx_conf)
2383 {
2384         const struct rte_memzone *tz;
2385         struct ixgbe_tx_queue *txq;
2386         struct ixgbe_hw     *hw;
2387         uint16_t tx_rs_thresh, tx_free_thresh;
2388
2389         PMD_INIT_FUNC_TRACE();
2390         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2391
2392         /*
2393          * Validate number of transmit descriptors.
2394          * It must not exceed hardware maximum, and must be multiple
2395          * of IXGBE_ALIGN.
2396          */
2397         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2398                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2399                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2400                 return -EINVAL;
2401         }
2402
2403         /*
2404          * The following two parameters control the setting of the RS bit on
2405          * transmit descriptors.
2406          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2407          * descriptors have been used.
2408          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2409          * descriptors are used or if the number of descriptors required
2410          * to transmit a packet is greater than the number of free TX
2411          * descriptors.
2412          * The following constraints must be satisfied:
2413          *  tx_rs_thresh must be greater than 0.
2414          *  tx_rs_thresh must be less than the size of the ring minus 2.
2415          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2416          *  tx_rs_thresh must be a divisor of the ring size.
2417          *  tx_free_thresh must be greater than 0.
2418          *  tx_free_thresh must be less than the size of the ring minus 3.
2419          * One descriptor in the TX ring is used as a sentinel to avoid a
2420          * H/W race condition, hence the maximum threshold constraints.
2421          * When set to zero use default values.
2422          */
2423         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2424                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2425         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2426                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2427         if (tx_rs_thresh >= (nb_desc - 2)) {
2428                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2429                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2430                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2431                         (int)dev->data->port_id, (int)queue_idx);
2432                 return -(EINVAL);
2433         }
2434         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2435                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2436                         "(tx_rs_thresh=%u port=%d queue=%d)",
2437                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2438                         (int)dev->data->port_id, (int)queue_idx);
2439                 return -(EINVAL);
2440         }
2441         if (tx_free_thresh >= (nb_desc - 3)) {
2442                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2443                              "tx_free_thresh must be less than the number of "
2444                              "TX descriptors minus 3. (tx_free_thresh=%u "
2445                              "port=%d queue=%d)",
2446                              (unsigned int)tx_free_thresh,
2447                              (int)dev->data->port_id, (int)queue_idx);
2448                 return -(EINVAL);
2449         }
2450         if (tx_rs_thresh > tx_free_thresh) {
2451                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2452                              "tx_free_thresh. (tx_free_thresh=%u "
2453                              "tx_rs_thresh=%u port=%d queue=%d)",
2454                              (unsigned int)tx_free_thresh,
2455                              (unsigned int)tx_rs_thresh,
2456                              (int)dev->data->port_id,
2457                              (int)queue_idx);
2458                 return -(EINVAL);
2459         }
2460         if ((nb_desc % tx_rs_thresh) != 0) {
2461                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2462                              "number of TX descriptors. (tx_rs_thresh=%u "
2463                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2464                              (int)dev->data->port_id, (int)queue_idx);
2465                 return -(EINVAL);
2466         }
2467
2468         /*
2469          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2470          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2471          * by the NIC and all descriptors are written back after the NIC
2472          * accumulates WTHRESH descriptors.
2473          */
2474         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2475                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2476                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2477                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2478                              (int)dev->data->port_id, (int)queue_idx);
2479                 return -(EINVAL);
2480         }
2481
2482         /* Free memory prior to re-allocation if needed... */
2483         if (dev->data->tx_queues[queue_idx] != NULL) {
2484                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2485                 dev->data->tx_queues[queue_idx] = NULL;
2486         }
2487
2488         /* First allocate the tx queue data structure */
2489         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2490                                  RTE_CACHE_LINE_SIZE, socket_id);
2491         if (txq == NULL)
2492                 return -ENOMEM;
2493
2494         /*
2495          * Allocate TX ring hardware descriptors. A memzone large enough to
2496          * handle the maximum ring size is allocated in order to allow for
2497          * resizing in later calls to the queue setup function.
2498          */
2499         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2500                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2501                         IXGBE_ALIGN, socket_id);
2502         if (tz == NULL) {
2503                 ixgbe_tx_queue_release(txq);
2504                 return -ENOMEM;
2505         }
2506
2507         txq->nb_tx_desc = nb_desc;
2508         txq->tx_rs_thresh = tx_rs_thresh;
2509         txq->tx_free_thresh = tx_free_thresh;
2510         txq->pthresh = tx_conf->tx_thresh.pthresh;
2511         txq->hthresh = tx_conf->tx_thresh.hthresh;
2512         txq->wthresh = tx_conf->tx_thresh.wthresh;
2513         txq->queue_id = queue_idx;
2514         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2515                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2516         txq->port_id = dev->data->port_id;
2517         txq->txq_flags = tx_conf->txq_flags;
2518         txq->ops = &def_txq_ops;
2519         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2520
2521         /*
2522          * Modification to set VFTDT for virtual function if vf is detected
2523          */
2524         if (hw->mac.type == ixgbe_mac_82599_vf ||
2525             hw->mac.type == ixgbe_mac_X540_vf ||
2526             hw->mac.type == ixgbe_mac_X550_vf ||
2527             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2528             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2529                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2530         else
2531                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2532
2533         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2534         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2535
2536         /* Allocate software ring */
2537         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2538                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2539                                 RTE_CACHE_LINE_SIZE, socket_id);
2540         if (txq->sw_ring == NULL) {
2541                 ixgbe_tx_queue_release(txq);
2542                 return -ENOMEM;
2543         }
2544         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2545                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2546
2547         /* set up vector or scalar TX function as appropriate */
2548         ixgbe_set_tx_function(dev, txq);
2549
2550         txq->ops->reset(txq);
2551
2552         dev->data->tx_queues[queue_idx] = txq;
2553
2554
2555         return 0;
2556 }
2557
2558 /**
2559  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2560  *
2561  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2562  * in the sw_rsc_ring is not set to NULL but rather points to the next
2563  * mbuf of this RSC aggregation (that has not been completed yet and still
2564  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2565  * will just free first "nb_segs" segments of the cluster explicitly by calling
2566  * an rte_pktmbuf_free_seg().
2567  *
2568  * @m scattered cluster head
2569  */
2570 static void __attribute__((cold))
2571 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2572 {
2573         uint8_t i, nb_segs = m->nb_segs;
2574         struct rte_mbuf *next_seg;
2575
2576         for (i = 0; i < nb_segs; i++) {
2577                 next_seg = m->next;
2578                 rte_pktmbuf_free_seg(m);
2579                 m = next_seg;
2580         }
2581 }
2582
2583 static void __attribute__((cold))
2584 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2585 {
2586         unsigned i;
2587
2588 #ifdef RTE_IXGBE_INC_VECTOR
2589         /* SSE Vector driver has a different way of releasing mbufs. */
2590         if (rxq->rx_using_sse) {
2591                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2592                 return;
2593         }
2594 #endif
2595
2596         if (rxq->sw_ring != NULL) {
2597                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2598                         if (rxq->sw_ring[i].mbuf != NULL) {
2599                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2600                                 rxq->sw_ring[i].mbuf = NULL;
2601                         }
2602                 }
2603                 if (rxq->rx_nb_avail) {
2604                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2605                                 struct rte_mbuf *mb;
2606
2607                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2608                                 rte_pktmbuf_free_seg(mb);
2609                         }
2610                         rxq->rx_nb_avail = 0;
2611                 }
2612         }
2613
2614         if (rxq->sw_sc_ring)
2615                 for (i = 0; i < rxq->nb_rx_desc; i++)
2616                         if (rxq->sw_sc_ring[i].fbuf) {
2617                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2618                                 rxq->sw_sc_ring[i].fbuf = NULL;
2619                         }
2620 }
2621
2622 static void __attribute__((cold))
2623 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2624 {
2625         if (rxq != NULL) {
2626                 ixgbe_rx_queue_release_mbufs(rxq);
2627                 rte_free(rxq->sw_ring);
2628                 rte_free(rxq->sw_sc_ring);
2629                 rte_free(rxq);
2630         }
2631 }
2632
2633 void __attribute__((cold))
2634 ixgbe_dev_rx_queue_release(void *rxq)
2635 {
2636         ixgbe_rx_queue_release(rxq);
2637 }
2638
2639 /*
2640  * Check if Rx Burst Bulk Alloc function can be used.
2641  * Return
2642  *        0: the preconditions are satisfied and the bulk allocation function
2643  *           can be used.
2644  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2645  *           function must be used.
2646  */
2647 static inline int __attribute__((cold))
2648 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2649 {
2650         int ret = 0;
2651
2652         /*
2653          * Make sure the following pre-conditions are satisfied:
2654          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2655          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2656          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2657          * Scattered packets are not supported.  This should be checked
2658          * outside of this function.
2659          */
2660         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2661                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2662                              "rxq->rx_free_thresh=%d, "
2663                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2664                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2665                 ret = -EINVAL;
2666         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2667                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2668                              "rxq->rx_free_thresh=%d, "
2669                              "rxq->nb_rx_desc=%d",
2670                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2671                 ret = -EINVAL;
2672         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2673                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2674                              "rxq->nb_rx_desc=%d, "
2675                              "rxq->rx_free_thresh=%d",
2676                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2677                 ret = -EINVAL;
2678         }
2679
2680         return ret;
2681 }
2682
2683 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2684 static void __attribute__((cold))
2685 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2686 {
2687         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2688         unsigned i;
2689         uint16_t len = rxq->nb_rx_desc;
2690
2691         /*
2692          * By default, the Rx queue setup function allocates enough memory for
2693          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2694          * extra memory at the end of the descriptor ring to be zero'd out.
2695          */
2696         if (adapter->rx_bulk_alloc_allowed)
2697                 /* zero out extra memory */
2698                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2699
2700         /*
2701          * Zero out HW ring memory. Zero out extra memory at the end of
2702          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2703          * reads extra memory as zeros.
2704          */
2705         for (i = 0; i < len; i++) {
2706                 rxq->rx_ring[i] = zeroed_desc;
2707         }
2708
2709         /*
2710          * initialize extra software ring entries. Space for these extra
2711          * entries is always allocated
2712          */
2713         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2714         for (i = rxq->nb_rx_desc; i < len; ++i) {
2715                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2716         }
2717
2718         rxq->rx_nb_avail = 0;
2719         rxq->rx_next_avail = 0;
2720         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2721         rxq->rx_tail = 0;
2722         rxq->nb_rx_hold = 0;
2723         rxq->pkt_first_seg = NULL;
2724         rxq->pkt_last_seg = NULL;
2725
2726 #ifdef RTE_IXGBE_INC_VECTOR
2727         rxq->rxrearm_start = 0;
2728         rxq->rxrearm_nb = 0;
2729 #endif
2730 }
2731
2732 int __attribute__((cold))
2733 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2734                          uint16_t queue_idx,
2735                          uint16_t nb_desc,
2736                          unsigned int socket_id,
2737                          const struct rte_eth_rxconf *rx_conf,
2738                          struct rte_mempool *mp)
2739 {
2740         const struct rte_memzone *rz;
2741         struct ixgbe_rx_queue *rxq;
2742         struct ixgbe_hw     *hw;
2743         uint16_t len;
2744         struct ixgbe_adapter *adapter =
2745                 (struct ixgbe_adapter *)dev->data->dev_private;
2746
2747         PMD_INIT_FUNC_TRACE();
2748         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2749
2750         /*
2751          * Validate number of receive descriptors.
2752          * It must not exceed hardware maximum, and must be multiple
2753          * of IXGBE_ALIGN.
2754          */
2755         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2756                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2757                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2758                 return -EINVAL;
2759         }
2760
2761         /* Free memory prior to re-allocation if needed... */
2762         if (dev->data->rx_queues[queue_idx] != NULL) {
2763                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2764                 dev->data->rx_queues[queue_idx] = NULL;
2765         }
2766
2767         /* First allocate the rx queue data structure */
2768         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2769                                  RTE_CACHE_LINE_SIZE, socket_id);
2770         if (rxq == NULL)
2771                 return -ENOMEM;
2772         rxq->mb_pool = mp;
2773         rxq->nb_rx_desc = nb_desc;
2774         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2775         rxq->queue_id = queue_idx;
2776         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2777                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2778         rxq->port_id = dev->data->port_id;
2779         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2780                                                         0 : ETHER_CRC_LEN);
2781         rxq->drop_en = rx_conf->rx_drop_en;
2782         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2783
2784         /*
2785          * The packet type in RX descriptor is different for different NICs.
2786          * Some bits are used for x550 but reserved for other NICS.
2787          * So set different masks for different NICs.
2788          */
2789         if (hw->mac.type == ixgbe_mac_X550 ||
2790             hw->mac.type == ixgbe_mac_X550EM_x ||
2791             hw->mac.type == ixgbe_mac_X550EM_a ||
2792             hw->mac.type == ixgbe_mac_X550_vf ||
2793             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2794             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2795                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2796         else
2797                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2798
2799         /*
2800          * Allocate RX ring hardware descriptors. A memzone large enough to
2801          * handle the maximum ring size is allocated in order to allow for
2802          * resizing in later calls to the queue setup function.
2803          */
2804         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2805                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2806         if (rz == NULL) {
2807                 ixgbe_rx_queue_release(rxq);
2808                 return -ENOMEM;
2809         }
2810
2811         /*
2812          * Zero init all the descriptors in the ring.
2813          */
2814         memset(rz->addr, 0, RX_RING_SZ);
2815
2816         /*
2817          * Modified to setup VFRDT for Virtual Function
2818          */
2819         if (hw->mac.type == ixgbe_mac_82599_vf ||
2820             hw->mac.type == ixgbe_mac_X540_vf ||
2821             hw->mac.type == ixgbe_mac_X550_vf ||
2822             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2823             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2824                 rxq->rdt_reg_addr =
2825                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2826                 rxq->rdh_reg_addr =
2827                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2828         } else {
2829                 rxq->rdt_reg_addr =
2830                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2831                 rxq->rdh_reg_addr =
2832                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2833         }
2834
2835         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2836         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2837
2838         /*
2839          * Certain constraints must be met in order to use the bulk buffer
2840          * allocation Rx burst function. If any of Rx queues doesn't meet them
2841          * the feature should be disabled for the whole port.
2842          */
2843         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2844                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2845                                     "preconditions - canceling the feature for "
2846                                     "the whole port[%d]",
2847                              rxq->queue_id, rxq->port_id);
2848                 adapter->rx_bulk_alloc_allowed = false;
2849         }
2850
2851         /*
2852          * Allocate software ring. Allow for space at the end of the
2853          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2854          * function does not access an invalid memory region.
2855          */
2856         len = nb_desc;
2857         if (adapter->rx_bulk_alloc_allowed)
2858                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2859
2860         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2861                                           sizeof(struct ixgbe_rx_entry) * len,
2862                                           RTE_CACHE_LINE_SIZE, socket_id);
2863         if (!rxq->sw_ring) {
2864                 ixgbe_rx_queue_release(rxq);
2865                 return -ENOMEM;
2866         }
2867
2868         /*
2869          * Always allocate even if it's not going to be needed in order to
2870          * simplify the code.
2871          *
2872          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2873          * be requested in ixgbe_dev_rx_init(), which is called later from
2874          * dev_start() flow.
2875          */
2876         rxq->sw_sc_ring =
2877                 rte_zmalloc_socket("rxq->sw_sc_ring",
2878                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2879                                    RTE_CACHE_LINE_SIZE, socket_id);
2880         if (!rxq->sw_sc_ring) {
2881                 ixgbe_rx_queue_release(rxq);
2882                 return -ENOMEM;
2883         }
2884
2885         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2886                             "dma_addr=0x%"PRIx64,
2887                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2888                      rxq->rx_ring_phys_addr);
2889
2890         if (!rte_is_power_of_2(nb_desc)) {
2891                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2892                                     "preconditions - canceling the feature for "
2893                                     "the whole port[%d]",
2894                              rxq->queue_id, rxq->port_id);
2895                 adapter->rx_vec_allowed = false;
2896         } else
2897                 ixgbe_rxq_vec_setup(rxq);
2898
2899         dev->data->rx_queues[queue_idx] = rxq;
2900
2901         ixgbe_reset_rx_queue(adapter, rxq);
2902
2903         return 0;
2904 }
2905
2906 uint32_t
2907 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2908 {
2909 #define IXGBE_RXQ_SCAN_INTERVAL 4
2910         volatile union ixgbe_adv_rx_desc *rxdp;
2911         struct ixgbe_rx_queue *rxq;
2912         uint32_t desc = 0;
2913
2914         rxq = dev->data->rx_queues[rx_queue_id];
2915         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2916
2917         while ((desc < rxq->nb_rx_desc) &&
2918                 (rxdp->wb.upper.status_error &
2919                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2920                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2921                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2922                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2923                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2924                                 desc - rxq->nb_rx_desc]);
2925         }
2926
2927         return desc;
2928 }
2929
2930 int
2931 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2932 {
2933         volatile union ixgbe_adv_rx_desc *rxdp;
2934         struct ixgbe_rx_queue *rxq = rx_queue;
2935         uint32_t desc;
2936
2937         if (unlikely(offset >= rxq->nb_rx_desc))
2938                 return 0;
2939         desc = rxq->rx_tail + offset;
2940         if (desc >= rxq->nb_rx_desc)
2941                 desc -= rxq->nb_rx_desc;
2942
2943         rxdp = &rxq->rx_ring[desc];
2944         return !!(rxdp->wb.upper.status_error &
2945                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2946 }
2947
2948 int
2949 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
2950 {
2951         struct ixgbe_rx_queue *rxq = rx_queue;
2952         volatile uint32_t *status;
2953         uint32_t nb_hold, desc;
2954
2955         if (unlikely(offset >= rxq->nb_rx_desc))
2956                 return -EINVAL;
2957
2958 #ifdef RTE_IXGBE_INC_VECTOR
2959         if (rxq->rx_using_sse)
2960                 nb_hold = rxq->rxrearm_nb;
2961         else
2962 #endif
2963                 nb_hold = rxq->nb_rx_hold;
2964         if (offset >= rxq->nb_rx_desc - nb_hold)
2965                 return RTE_ETH_RX_DESC_UNAVAIL;
2966
2967         desc = rxq->rx_tail + offset;
2968         if (desc >= rxq->nb_rx_desc)
2969                 desc -= rxq->nb_rx_desc;
2970
2971         status = &rxq->rx_ring[desc].wb.upper.status_error;
2972         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
2973                 return RTE_ETH_RX_DESC_DONE;
2974
2975         return RTE_ETH_RX_DESC_AVAIL;
2976 }
2977
2978 int
2979 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
2980 {
2981         struct ixgbe_tx_queue *txq = tx_queue;
2982         volatile uint32_t *status;
2983         uint32_t desc;
2984
2985         if (unlikely(offset >= txq->nb_tx_desc))
2986                 return -EINVAL;
2987
2988         desc = txq->tx_tail + offset;
2989         /* go to next desc that has the RS bit */
2990         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
2991                 txq->tx_rs_thresh;
2992         if (desc >= txq->nb_tx_desc) {
2993                 desc -= txq->nb_tx_desc;
2994                 if (desc >= txq->nb_tx_desc)
2995                         desc -= txq->nb_tx_desc;
2996         }
2997
2998         status = &txq->tx_ring[desc].wb.status;
2999         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3000                 return RTE_ETH_TX_DESC_DONE;
3001
3002         return RTE_ETH_TX_DESC_FULL;
3003 }
3004
3005 void __attribute__((cold))
3006 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3007 {
3008         unsigned i;
3009         struct ixgbe_adapter *adapter =
3010                 (struct ixgbe_adapter *)dev->data->dev_private;
3011
3012         PMD_INIT_FUNC_TRACE();
3013
3014         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3015                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3016
3017                 if (txq != NULL) {
3018                         txq->ops->release_mbufs(txq);
3019                         txq->ops->reset(txq);
3020                 }
3021         }
3022
3023         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3024                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3025
3026                 if (rxq != NULL) {
3027                         ixgbe_rx_queue_release_mbufs(rxq);
3028                         ixgbe_reset_rx_queue(adapter, rxq);
3029                 }
3030         }
3031 }
3032
3033 void
3034 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3035 {
3036         unsigned i;
3037
3038         PMD_INIT_FUNC_TRACE();
3039
3040         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3041                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3042                 dev->data->rx_queues[i] = NULL;
3043         }
3044         dev->data->nb_rx_queues = 0;
3045
3046         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3047                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3048                 dev->data->tx_queues[i] = NULL;
3049         }
3050         dev->data->nb_tx_queues = 0;
3051 }
3052
3053 /*********************************************************************
3054  *
3055  *  Device RX/TX init functions
3056  *
3057  **********************************************************************/
3058
3059 /**
3060  * Receive Side Scaling (RSS)
3061  * See section 7.1.2.8 in the following document:
3062  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3063  *
3064  * Principles:
3065  * The source and destination IP addresses of the IP header and the source
3066  * and destination ports of TCP/UDP headers, if any, of received packets are
3067  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3068  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3069  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3070  * RSS output index which is used as the RX queue index where to store the
3071  * received packets.
3072  * The following output is supplied in the RX write-back descriptor:
3073  *     - 32-bit result of the Microsoft RSS hash function,
3074  *     - 4-bit RSS type field.
3075  */
3076
3077 /*
3078  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3079  * Used as the default key.
3080  */
3081 static uint8_t rss_intel_key[40] = {
3082         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3083         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3084         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3085         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3086         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3087 };
3088
3089 static void
3090 ixgbe_rss_disable(struct rte_eth_dev *dev)
3091 {
3092         struct ixgbe_hw *hw;
3093         uint32_t mrqc;
3094         uint32_t mrqc_reg;
3095
3096         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3097         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3098         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3099         mrqc &= ~IXGBE_MRQC_RSSEN;
3100         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3101 }
3102
3103 static void
3104 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3105 {
3106         uint8_t  *hash_key;
3107         uint32_t mrqc;
3108         uint32_t rss_key;
3109         uint64_t rss_hf;
3110         uint16_t i;
3111         uint32_t mrqc_reg;
3112         uint32_t rssrk_reg;
3113
3114         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3115         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3116
3117         hash_key = rss_conf->rss_key;
3118         if (hash_key != NULL) {
3119                 /* Fill in RSS hash key */
3120                 for (i = 0; i < 10; i++) {
3121                         rss_key  = hash_key[(i * 4)];
3122                         rss_key |= hash_key[(i * 4) + 1] << 8;
3123                         rss_key |= hash_key[(i * 4) + 2] << 16;
3124                         rss_key |= hash_key[(i * 4) + 3] << 24;
3125                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3126                 }
3127         }
3128
3129         /* Set configured hashing protocols in MRQC register */
3130         rss_hf = rss_conf->rss_hf;
3131         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3132         if (rss_hf & ETH_RSS_IPV4)
3133                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3134         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3135                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3136         if (rss_hf & ETH_RSS_IPV6)
3137                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3138         if (rss_hf & ETH_RSS_IPV6_EX)
3139                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3140         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3141                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3142         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3143                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3144         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3145                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3146         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3147                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3148         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3149                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3150         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3151 }
3152
3153 int
3154 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3155                           struct rte_eth_rss_conf *rss_conf)
3156 {
3157         struct ixgbe_hw *hw;
3158         uint32_t mrqc;
3159         uint64_t rss_hf;
3160         uint32_t mrqc_reg;
3161
3162         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3163
3164         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3165                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3166                         "NIC.");
3167                 return -ENOTSUP;
3168         }
3169         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3170
3171         /*
3172          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3173          *     "RSS enabling cannot be done dynamically while it must be
3174          *      preceded by a software reset"
3175          * Before changing anything, first check that the update RSS operation
3176          * does not attempt to disable RSS, if RSS was enabled at
3177          * initialization time, or does not attempt to enable RSS, if RSS was
3178          * disabled at initialization time.
3179          */
3180         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3181         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3182         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3183                 if (rss_hf != 0) /* Enable RSS */
3184                         return -(EINVAL);
3185                 return 0; /* Nothing to do */
3186         }
3187         /* RSS enabled */
3188         if (rss_hf == 0) /* Disable RSS */
3189                 return -(EINVAL);
3190         ixgbe_hw_rss_hash_set(hw, rss_conf);
3191         return 0;
3192 }
3193
3194 int
3195 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3196                             struct rte_eth_rss_conf *rss_conf)
3197 {
3198         struct ixgbe_hw *hw;
3199         uint8_t *hash_key;
3200         uint32_t mrqc;
3201         uint32_t rss_key;
3202         uint64_t rss_hf;
3203         uint16_t i;
3204         uint32_t mrqc_reg;
3205         uint32_t rssrk_reg;
3206
3207         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3208         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3209         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3210         hash_key = rss_conf->rss_key;
3211         if (hash_key != NULL) {
3212                 /* Return RSS hash key */
3213                 for (i = 0; i < 10; i++) {
3214                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3215                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3216                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3217                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3218                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3219                 }
3220         }
3221
3222         /* Get RSS functions configured in MRQC register */
3223         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3224         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3225                 rss_conf->rss_hf = 0;
3226                 return 0;
3227         }
3228         rss_hf = 0;
3229         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3230                 rss_hf |= ETH_RSS_IPV4;
3231         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3232                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3233         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3234                 rss_hf |= ETH_RSS_IPV6;
3235         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3236                 rss_hf |= ETH_RSS_IPV6_EX;
3237         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3238                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3239         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3240                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3241         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3242                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3243         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3244                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3245         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3246                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3247         rss_conf->rss_hf = rss_hf;
3248         return 0;
3249 }
3250
3251 static void
3252 ixgbe_rss_configure(struct rte_eth_dev *dev)
3253 {
3254         struct rte_eth_rss_conf rss_conf;
3255         struct ixgbe_hw *hw;
3256         uint32_t reta;
3257         uint16_t i;
3258         uint16_t j;
3259         uint16_t sp_reta_size;
3260         uint32_t reta_reg;
3261
3262         PMD_INIT_FUNC_TRACE();
3263         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3264
3265         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3266
3267         /*
3268          * Fill in redirection table
3269          * The byte-swap is needed because NIC registers are in
3270          * little-endian order.
3271          */
3272         reta = 0;
3273         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3274                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3275
3276                 if (j == dev->data->nb_rx_queues)
3277                         j = 0;
3278                 reta = (reta << 8) | j;
3279                 if ((i & 3) == 3)
3280                         IXGBE_WRITE_REG(hw, reta_reg,
3281                                         rte_bswap32(reta));
3282         }
3283
3284         /*
3285          * Configure the RSS key and the RSS protocols used to compute
3286          * the RSS hash of input packets.
3287          */
3288         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3289         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3290                 ixgbe_rss_disable(dev);
3291                 return;
3292         }
3293         if (rss_conf.rss_key == NULL)
3294                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3295         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3296 }
3297
3298 #define NUM_VFTA_REGISTERS 128
3299 #define NIC_RX_BUFFER_SIZE 0x200
3300 #define X550_RX_BUFFER_SIZE 0x180
3301
3302 static void
3303 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3304 {
3305         struct rte_eth_vmdq_dcb_conf *cfg;
3306         struct ixgbe_hw *hw;
3307         enum rte_eth_nb_pools num_pools;
3308         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3309         uint16_t pbsize;
3310         uint8_t nb_tcs; /* number of traffic classes */
3311         int i;
3312
3313         PMD_INIT_FUNC_TRACE();
3314         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3315         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3316         num_pools = cfg->nb_queue_pools;
3317         /* Check we have a valid number of pools */
3318         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3319                 ixgbe_rss_disable(dev);
3320                 return;
3321         }
3322         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3323         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3324
3325         /*
3326          * RXPBSIZE
3327          * split rx buffer up into sections, each for 1 traffic class
3328          */
3329         switch (hw->mac.type) {
3330         case ixgbe_mac_X550:
3331         case ixgbe_mac_X550EM_x:
3332         case ixgbe_mac_X550EM_a:
3333                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3334                 break;
3335         default:
3336                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3337                 break;
3338         }
3339         for (i = 0; i < nb_tcs; i++) {
3340                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3341
3342                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3343                 /* clear 10 bits. */
3344                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3345                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3346         }
3347         /* zero alloc all unused TCs */
3348         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3349                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3350
3351                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3352                 /* clear 10 bits. */
3353                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3354         }
3355
3356         /* MRQC: enable vmdq and dcb */
3357         mrqc = (num_pools == ETH_16_POOLS) ?
3358                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3359         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3360
3361         /* PFVTCTL: turn on virtualisation and set the default pool */
3362         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3363         if (cfg->enable_default_pool) {
3364                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3365         } else {
3366                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3367         }
3368
3369         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3370
3371         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3372         queue_mapping = 0;
3373         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3374                 /*
3375                  * mapping is done with 3 bits per priority,
3376                  * so shift by i*3 each time
3377                  */
3378                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3379
3380         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3381
3382         /* RTRPCS: DCB related */
3383         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3384
3385         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3386         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3387         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3388         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3389
3390         /* VFTA - enable all vlan filters */
3391         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3392                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3393         }
3394
3395         /* VFRE: pool enabling for receive - 16 or 32 */
3396         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3397                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3398
3399         /*
3400          * MPSAR - allow pools to read specific mac addresses
3401          * In this case, all pools should be able to read from mac addr 0
3402          */
3403         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3404         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3405
3406         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3407         for (i = 0; i < cfg->nb_pool_maps; i++) {
3408                 /* set vlan id in VF register and set the valid bit */
3409                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3410                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3411                 /*
3412                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3413                  * pools, we only need to use the first half of the register
3414                  * i.e. bits 0-31
3415                  */
3416                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3417         }
3418 }
3419
3420 /**
3421  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3422  * @dev: pointer to eth_dev structure
3423  * @dcb_config: pointer to ixgbe_dcb_config structure
3424  */
3425 static void
3426 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3427                        struct ixgbe_dcb_config *dcb_config)
3428 {
3429         uint32_t reg;
3430         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3431
3432         PMD_INIT_FUNC_TRACE();
3433         if (hw->mac.type != ixgbe_mac_82598EB) {
3434                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3435                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3436                 reg |= IXGBE_RTTDCS_ARBDIS;
3437                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3438
3439                 /* Enable DCB for Tx with 8 TCs */
3440                 if (dcb_config->num_tcs.pg_tcs == 8) {
3441                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3442                 } else {
3443                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3444                 }
3445                 if (dcb_config->vt_mode)
3446                         reg |= IXGBE_MTQC_VT_ENA;
3447                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3448
3449                 /* Enable the Tx desc arbiter */
3450                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3451                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3452                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3453
3454                 /* Enable Security TX Buffer IFG for DCB */
3455                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3456                 reg |= IXGBE_SECTX_DCB;
3457                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3458         }
3459 }
3460
3461 /**
3462  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3463  * @dev: pointer to rte_eth_dev structure
3464  * @dcb_config: pointer to ixgbe_dcb_config structure
3465  */
3466 static void
3467 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3468                         struct ixgbe_dcb_config *dcb_config)
3469 {
3470         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3471                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3472         struct ixgbe_hw *hw =
3473                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3474
3475         PMD_INIT_FUNC_TRACE();
3476         if (hw->mac.type != ixgbe_mac_82598EB)
3477                 /*PF VF Transmit Enable*/
3478                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3479                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3480
3481         /*Configure general DCB TX parameters*/
3482         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3483 }
3484
3485 static void
3486 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3487                         struct ixgbe_dcb_config *dcb_config)
3488 {
3489         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3490                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3491         struct ixgbe_dcb_tc_config *tc;
3492         uint8_t i, j;
3493
3494         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3495         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3496                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3497                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3498         } else {
3499                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3500                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3501         }
3502         /* User Priority to Traffic Class mapping */
3503         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3504                 j = vmdq_rx_conf->dcb_tc[i];
3505                 tc = &dcb_config->tc_config[j];
3506                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3507                                                 (uint8_t)(1 << j);
3508         }
3509 }
3510
3511 static void
3512 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3513                         struct ixgbe_dcb_config *dcb_config)
3514 {
3515         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3516                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3517         struct ixgbe_dcb_tc_config *tc;
3518         uint8_t i, j;
3519
3520         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3521         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3522                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3523                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3524         } else {
3525                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3526                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3527         }
3528
3529         /* User Priority to Traffic Class mapping */
3530         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3531                 j = vmdq_tx_conf->dcb_tc[i];
3532                 tc = &dcb_config->tc_config[j];
3533                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3534                                                 (uint8_t)(1 << j);
3535         }
3536 }
3537
3538 static void
3539 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3540                 struct ixgbe_dcb_config *dcb_config)
3541 {
3542         struct rte_eth_dcb_rx_conf *rx_conf =
3543                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3544         struct ixgbe_dcb_tc_config *tc;
3545         uint8_t i, j;
3546
3547         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3548         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3549
3550         /* User Priority to Traffic Class mapping */
3551         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3552                 j = rx_conf->dcb_tc[i];
3553                 tc = &dcb_config->tc_config[j];
3554                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3555                                                 (uint8_t)(1 << j);
3556         }
3557 }
3558
3559 static void
3560 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3561                 struct ixgbe_dcb_config *dcb_config)
3562 {
3563         struct rte_eth_dcb_tx_conf *tx_conf =
3564                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3565         struct ixgbe_dcb_tc_config *tc;
3566         uint8_t i, j;
3567
3568         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3569         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3570
3571         /* User Priority to Traffic Class mapping */
3572         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3573                 j = tx_conf->dcb_tc[i];
3574                 tc = &dcb_config->tc_config[j];
3575                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3576                                                 (uint8_t)(1 << j);
3577         }
3578 }
3579
3580 /**
3581  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3582  * @dev: pointer to eth_dev structure
3583  * @dcb_config: pointer to ixgbe_dcb_config structure
3584  */
3585 static void
3586 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3587                        struct ixgbe_dcb_config *dcb_config)
3588 {
3589         uint32_t reg;
3590         uint32_t vlanctrl;
3591         uint8_t i;
3592         uint32_t q;
3593         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3594
3595         PMD_INIT_FUNC_TRACE();
3596         /*
3597          * Disable the arbiter before changing parameters
3598          * (always enable recycle mode; WSP)
3599          */
3600         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3601         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3602
3603         if (hw->mac.type != ixgbe_mac_82598EB) {
3604                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3605                 if (dcb_config->num_tcs.pg_tcs == 4) {
3606                         if (dcb_config->vt_mode)
3607                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3608                                         IXGBE_MRQC_VMDQRT4TCEN;
3609                         else {
3610                                 /* no matter the mode is DCB or DCB_RSS, just
3611                                  * set the MRQE to RSSXTCEN. RSS is controlled
3612                                  * by RSS_FIELD
3613                                  */
3614                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3615                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3616                                         IXGBE_MRQC_RTRSS4TCEN;
3617                         }
3618                 }
3619                 if (dcb_config->num_tcs.pg_tcs == 8) {
3620                         if (dcb_config->vt_mode)
3621                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3622                                         IXGBE_MRQC_VMDQRT8TCEN;
3623                         else {
3624                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3625                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3626                                         IXGBE_MRQC_RTRSS8TCEN;
3627                         }
3628                 }
3629
3630                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3631
3632                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3633                         /* Disable drop for all queues in VMDQ mode*/
3634                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3635                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3636                                                 (IXGBE_QDE_WRITE |
3637                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3638                 } else {
3639                         /* Enable drop for all queues in SRIOV mode */
3640                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3641                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3642                                                 (IXGBE_QDE_WRITE |
3643                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3644                                                  IXGBE_QDE_ENABLE));
3645                 }
3646         }
3647
3648         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3649         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3650         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3651         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3652
3653         /* VFTA - enable all vlan filters */
3654         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3655                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3656         }
3657
3658         /*
3659          * Configure Rx packet plane (recycle mode; WSP) and
3660          * enable arbiter
3661          */
3662         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3663         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3664 }
3665
3666 static void
3667 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3668                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3669 {
3670         switch (hw->mac.type) {
3671         case ixgbe_mac_82598EB:
3672                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3673                 break;
3674         case ixgbe_mac_82599EB:
3675         case ixgbe_mac_X540:
3676         case ixgbe_mac_X550:
3677         case ixgbe_mac_X550EM_x:
3678         case ixgbe_mac_X550EM_a:
3679                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3680                                                   tsa, map);
3681                 break;
3682         default:
3683                 break;
3684         }
3685 }
3686
3687 static void
3688 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3689                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3690 {
3691         switch (hw->mac.type) {
3692         case ixgbe_mac_82598EB:
3693                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3694                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3695                 break;
3696         case ixgbe_mac_82599EB:
3697         case ixgbe_mac_X540:
3698         case ixgbe_mac_X550:
3699         case ixgbe_mac_X550EM_x:
3700         case ixgbe_mac_X550EM_a:
3701                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3702                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3703                 break;
3704         default:
3705                 break;
3706         }
3707 }
3708
3709 #define DCB_RX_CONFIG  1
3710 #define DCB_TX_CONFIG  1
3711 #define DCB_TX_PB      1024
3712 /**
3713  * ixgbe_dcb_hw_configure - Enable DCB and configure
3714  * general DCB in VT mode and non-VT mode parameters
3715  * @dev: pointer to rte_eth_dev structure
3716  * @dcb_config: pointer to ixgbe_dcb_config structure
3717  */
3718 static int
3719 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3720                         struct ixgbe_dcb_config *dcb_config)
3721 {
3722         int     ret = 0;
3723         uint8_t i, pfc_en, nb_tcs;
3724         uint16_t pbsize, rx_buffer_size;
3725         uint8_t config_dcb_rx = 0;
3726         uint8_t config_dcb_tx = 0;
3727         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3728         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3729         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3730         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3731         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3732         struct ixgbe_dcb_tc_config *tc;
3733         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3734         struct ixgbe_hw *hw =
3735                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3736
3737         switch (dev->data->dev_conf.rxmode.mq_mode) {
3738         case ETH_MQ_RX_VMDQ_DCB:
3739                 dcb_config->vt_mode = true;
3740                 if (hw->mac.type != ixgbe_mac_82598EB) {
3741                         config_dcb_rx = DCB_RX_CONFIG;
3742                         /*
3743                          *get dcb and VT rx configuration parameters
3744                          *from rte_eth_conf
3745                          */
3746                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3747                         /*Configure general VMDQ and DCB RX parameters*/
3748                         ixgbe_vmdq_dcb_configure(dev);
3749                 }
3750                 break;
3751         case ETH_MQ_RX_DCB:
3752         case ETH_MQ_RX_DCB_RSS:
3753                 dcb_config->vt_mode = false;
3754                 config_dcb_rx = DCB_RX_CONFIG;
3755                 /* Get dcb TX configuration parameters from rte_eth_conf */
3756                 ixgbe_dcb_rx_config(dev, dcb_config);
3757                 /*Configure general DCB RX parameters*/
3758                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3759                 break;
3760         default:
3761                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3762                 break;
3763         }
3764         switch (dev->data->dev_conf.txmode.mq_mode) {
3765         case ETH_MQ_TX_VMDQ_DCB:
3766                 dcb_config->vt_mode = true;
3767                 config_dcb_tx = DCB_TX_CONFIG;
3768                 /* get DCB and VT TX configuration parameters
3769                  * from rte_eth_conf
3770                  */
3771                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3772                 /*Configure general VMDQ and DCB TX parameters*/
3773                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3774                 break;
3775
3776         case ETH_MQ_TX_DCB:
3777                 dcb_config->vt_mode = false;
3778                 config_dcb_tx = DCB_TX_CONFIG;
3779                 /*get DCB TX configuration parameters from rte_eth_conf*/
3780                 ixgbe_dcb_tx_config(dev, dcb_config);
3781                 /*Configure general DCB TX parameters*/
3782                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3783                 break;
3784         default:
3785                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3786                 break;
3787         }
3788
3789         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3790         /* Unpack map */
3791         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3792         if (nb_tcs == ETH_4_TCS) {
3793                 /* Avoid un-configured priority mapping to TC0 */
3794                 uint8_t j = 4;
3795                 uint8_t mask = 0xFF;
3796
3797                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3798                         mask = (uint8_t)(mask & (~(1 << map[i])));
3799                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3800                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3801                                 map[j++] = i;
3802                         mask >>= 1;
3803                 }
3804                 /* Re-configure 4 TCs BW */
3805                 for (i = 0; i < nb_tcs; i++) {
3806                         tc = &dcb_config->tc_config[i];
3807                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3808                                                 (uint8_t)(100 / nb_tcs);
3809                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3810                                                 (uint8_t)(100 / nb_tcs);
3811                 }
3812                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3813                         tc = &dcb_config->tc_config[i];
3814                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3815                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3816                 }
3817         }
3818
3819         switch (hw->mac.type) {
3820         case ixgbe_mac_X550:
3821         case ixgbe_mac_X550EM_x:
3822         case ixgbe_mac_X550EM_a:
3823                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3824                 break;
3825         default:
3826                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3827                 break;
3828         }
3829
3830         if (config_dcb_rx) {
3831                 /* Set RX buffer size */
3832                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3833                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3834
3835                 for (i = 0; i < nb_tcs; i++) {
3836                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3837                 }
3838                 /* zero alloc all unused TCs */
3839                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3840                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3841                 }
3842         }
3843         if (config_dcb_tx) {
3844                 /* Only support an equally distributed
3845                  *  Tx packet buffer strategy.
3846                  */
3847                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3848                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3849
3850                 for (i = 0; i < nb_tcs; i++) {
3851                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3852                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3853                 }
3854                 /* Clear unused TCs, if any, to zero buffer size*/
3855                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3856                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3857                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3858                 }
3859         }
3860
3861         /*Calculates traffic class credits*/
3862         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3863                                 IXGBE_DCB_TX_CONFIG);
3864         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3865                                 IXGBE_DCB_RX_CONFIG);
3866
3867         if (config_dcb_rx) {
3868                 /* Unpack CEE standard containers */
3869                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3870                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3871                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3872                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3873                 /* Configure PG(ETS) RX */
3874                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3875         }
3876
3877         if (config_dcb_tx) {
3878                 /* Unpack CEE standard containers */
3879                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3880                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3881                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3882                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3883                 /* Configure PG(ETS) TX */
3884                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3885         }
3886
3887         /*Configure queue statistics registers*/
3888         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3889
3890         /* Check if the PFC is supported */
3891         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3892                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3893                 for (i = 0; i < nb_tcs; i++) {
3894                         /*
3895                         * If the TC count is 8,and the default high_water is 48,
3896                         * the low_water is 16 as default.
3897                         */
3898                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3899                         hw->fc.low_water[i] = pbsize / 4;
3900                         /* Enable pfc for this TC */
3901                         tc = &dcb_config->tc_config[i];
3902                         tc->pfc = ixgbe_dcb_pfc_enabled;
3903                 }
3904                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3905                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3906                         pfc_en &= 0x0F;
3907                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3908         }
3909
3910         return ret;
3911 }
3912
3913 /**
3914  * ixgbe_configure_dcb - Configure DCB  Hardware
3915  * @dev: pointer to rte_eth_dev
3916  */
3917 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3918 {
3919         struct ixgbe_dcb_config *dcb_cfg =
3920                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3921         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3922
3923         PMD_INIT_FUNC_TRACE();
3924
3925         /* check support mq_mode for DCB */
3926         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3927             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3928             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3929                 return;
3930
3931         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3932                 return;
3933
3934         /** Configure DCB hardware **/
3935         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3936 }
3937
3938 /*
3939  * VMDq only support for 10 GbE NIC.
3940  */
3941 static void
3942 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3943 {
3944         struct rte_eth_vmdq_rx_conf *cfg;
3945         struct ixgbe_hw *hw;
3946         enum rte_eth_nb_pools num_pools;
3947         uint32_t mrqc, vt_ctl, vlanctrl;
3948         uint32_t vmolr = 0;
3949         int i;
3950
3951         PMD_INIT_FUNC_TRACE();
3952         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3953         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3954         num_pools = cfg->nb_queue_pools;
3955
3956         ixgbe_rss_disable(dev);
3957
3958         /* MRQC: enable vmdq */
3959         mrqc = IXGBE_MRQC_VMDQEN;
3960         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3961
3962         /* PFVTCTL: turn on virtualisation and set the default pool */
3963         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3964         if (cfg->enable_default_pool)
3965                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3966         else
3967                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3968
3969         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3970
3971         for (i = 0; i < (int)num_pools; i++) {
3972                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3973                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3974         }
3975
3976         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3977         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3978         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3979         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3980
3981         /* VFTA - enable all vlan filters */
3982         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3983                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3984
3985         /* VFRE: pool enabling for receive - 64 */
3986         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3987         if (num_pools == ETH_64_POOLS)
3988                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3989
3990         /*
3991          * MPSAR - allow pools to read specific mac addresses
3992          * In this case, all pools should be able to read from mac addr 0
3993          */
3994         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
3995         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
3996
3997         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3998         for (i = 0; i < cfg->nb_pool_maps; i++) {
3999                 /* set vlan id in VF register and set the valid bit */
4000                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4001                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4002                 /*
4003                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4004                  * pools, we only need to use the first half of the register
4005                  * i.e. bits 0-31
4006                  */
4007                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4008                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4009                                         (cfg->pool_map[i].pools & UINT32_MAX));
4010                 else
4011                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4012                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4013
4014         }
4015
4016         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4017         if (cfg->enable_loop_back) {
4018                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4019                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4020                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4021         }
4022
4023         IXGBE_WRITE_FLUSH(hw);
4024 }
4025
4026 /*
4027  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4028  * @hw: pointer to hardware structure
4029  */
4030 static void
4031 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4032 {
4033         uint32_t reg;
4034         uint32_t q;
4035
4036         PMD_INIT_FUNC_TRACE();
4037         /*PF VF Transmit Enable*/
4038         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4039         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4040
4041         /* Disable the Tx desc arbiter so that MTQC can be changed */
4042         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4043         reg |= IXGBE_RTTDCS_ARBDIS;
4044         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4045
4046         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4047         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4048
4049         /* Disable drop for all queues */
4050         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4051                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4052                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4053
4054         /* Enable the Tx desc arbiter */
4055         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4056         reg &= ~IXGBE_RTTDCS_ARBDIS;
4057         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4058
4059         IXGBE_WRITE_FLUSH(hw);
4060 }
4061
4062 static int __attribute__((cold))
4063 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4064 {
4065         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4066         uint64_t dma_addr;
4067         unsigned int i;
4068
4069         /* Initialize software ring entries */
4070         for (i = 0; i < rxq->nb_rx_desc; i++) {
4071                 volatile union ixgbe_adv_rx_desc *rxd;
4072                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4073
4074                 if (mbuf == NULL) {
4075                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4076                                      (unsigned) rxq->queue_id);
4077                         return -ENOMEM;
4078                 }
4079
4080                 rte_mbuf_refcnt_set(mbuf, 1);
4081                 mbuf->next = NULL;
4082                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4083                 mbuf->nb_segs = 1;
4084                 mbuf->port = rxq->port_id;
4085
4086                 dma_addr =
4087                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4088                 rxd = &rxq->rx_ring[i];
4089                 rxd->read.hdr_addr = 0;
4090                 rxd->read.pkt_addr = dma_addr;
4091                 rxe[i].mbuf = mbuf;
4092         }
4093
4094         return 0;
4095 }
4096
4097 static int
4098 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4099 {
4100         struct ixgbe_hw *hw;
4101         uint32_t mrqc;
4102
4103         ixgbe_rss_configure(dev);
4104
4105         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4106
4107         /* MRQC: enable VF RSS */
4108         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4109         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4110         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4111         case ETH_64_POOLS:
4112                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4113                 break;
4114
4115         case ETH_32_POOLS:
4116                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4117                 break;
4118
4119         default:
4120                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4121                 return -EINVAL;
4122         }
4123
4124         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4125
4126         return 0;
4127 }
4128
4129 static int
4130 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4131 {
4132         struct ixgbe_hw *hw =
4133                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4134
4135         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4136         case ETH_64_POOLS:
4137                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4138                         IXGBE_MRQC_VMDQEN);
4139                 break;
4140
4141         case ETH_32_POOLS:
4142                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4143                         IXGBE_MRQC_VMDQRT4TCEN);
4144                 break;
4145
4146         case ETH_16_POOLS:
4147                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4148                         IXGBE_MRQC_VMDQRT8TCEN);
4149                 break;
4150         default:
4151                 PMD_INIT_LOG(ERR,
4152                         "invalid pool number in IOV mode");
4153                 break;
4154         }
4155         return 0;
4156 }
4157
4158 static int
4159 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4160 {
4161         struct ixgbe_hw *hw =
4162                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4163
4164         if (hw->mac.type == ixgbe_mac_82598EB)
4165                 return 0;
4166
4167         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4168                 /*
4169                  * SRIOV inactive scheme
4170                  * any DCB/RSS w/o VMDq multi-queue setting
4171                  */
4172                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4173                 case ETH_MQ_RX_RSS:
4174                 case ETH_MQ_RX_DCB_RSS:
4175                 case ETH_MQ_RX_VMDQ_RSS:
4176                         ixgbe_rss_configure(dev);
4177                         break;
4178
4179                 case ETH_MQ_RX_VMDQ_DCB:
4180                         ixgbe_vmdq_dcb_configure(dev);
4181                         break;
4182
4183                 case ETH_MQ_RX_VMDQ_ONLY:
4184                         ixgbe_vmdq_rx_hw_configure(dev);
4185                         break;
4186
4187                 case ETH_MQ_RX_NONE:
4188                 default:
4189                         /* if mq_mode is none, disable rss mode.*/
4190                         ixgbe_rss_disable(dev);
4191                         break;
4192                 }
4193         } else {
4194                 /*
4195                  * SRIOV active scheme
4196                  * Support RSS together with VMDq & SRIOV
4197                  */
4198                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4199                 case ETH_MQ_RX_RSS:
4200                 case ETH_MQ_RX_VMDQ_RSS:
4201                         ixgbe_config_vf_rss(dev);
4202                         break;
4203                 case ETH_MQ_RX_VMDQ_DCB:
4204                         ixgbe_vmdq_dcb_configure(dev);
4205                         break;
4206                 /* FIXME if support DCB/RSS together with VMDq & SRIOV */
4207                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4208                         PMD_INIT_LOG(ERR,
4209                                 "Could not support DCB/RSS with VMDq & SRIOV");
4210                         return -1;
4211                 default:
4212                         ixgbe_config_vf_default(dev);
4213                         break;
4214                 }
4215         }
4216
4217         return 0;
4218 }
4219
4220 static int
4221 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4222 {
4223         struct ixgbe_hw *hw =
4224                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4225         uint32_t mtqc;
4226         uint32_t rttdcs;
4227
4228         if (hw->mac.type == ixgbe_mac_82598EB)
4229                 return 0;
4230
4231         /* disable arbiter before setting MTQC */
4232         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4233         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4234         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4235
4236         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4237                 /*
4238                  * SRIOV inactive scheme
4239                  * any DCB w/o VMDq multi-queue setting
4240                  */
4241                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4242                         ixgbe_vmdq_tx_hw_configure(hw);
4243                 else {
4244                         mtqc = IXGBE_MTQC_64Q_1PB;
4245                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4246                 }
4247         } else {
4248                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4249
4250                 /*
4251                  * SRIOV active scheme
4252                  * FIXME if support DCB together with VMDq & SRIOV
4253                  */
4254                 case ETH_64_POOLS:
4255                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4256                         break;
4257                 case ETH_32_POOLS:
4258                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4259                         break;
4260                 case ETH_16_POOLS:
4261                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4262                                 IXGBE_MTQC_8TC_8TQ;
4263                         break;
4264                 default:
4265                         mtqc = IXGBE_MTQC_64Q_1PB;
4266                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4267                 }
4268                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4269         }
4270
4271         /* re-enable arbiter */
4272         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4273         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4274
4275         return 0;
4276 }
4277
4278 /**
4279  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4280  *
4281  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4282  * spec rev. 3.0 chapter 8.2.3.8.13.
4283  *
4284  * @pool Memory pool of the Rx queue
4285  */
4286 static inline uint32_t
4287 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4288 {
4289         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4290
4291         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4292         uint16_t maxdesc =
4293                 IPV4_MAX_PKT_LEN /
4294                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4295
4296         if (maxdesc >= 16)
4297                 return IXGBE_RSCCTL_MAXDESC_16;
4298         else if (maxdesc >= 8)
4299                 return IXGBE_RSCCTL_MAXDESC_8;
4300         else if (maxdesc >= 4)
4301                 return IXGBE_RSCCTL_MAXDESC_4;
4302         else
4303                 return IXGBE_RSCCTL_MAXDESC_1;
4304 }
4305
4306 /**
4307  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4308  * interrupt
4309  *
4310  * (Taken from FreeBSD tree)
4311  * (yes this is all very magic and confusing :)
4312  *
4313  * @dev port handle
4314  * @entry the register array entry
4315  * @vector the MSIX vector for this queue
4316  * @type RX/TX/MISC
4317  */
4318 static void
4319 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4320 {
4321         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4322         u32 ivar, index;
4323
4324         vector |= IXGBE_IVAR_ALLOC_VAL;
4325
4326         switch (hw->mac.type) {
4327
4328         case ixgbe_mac_82598EB:
4329                 if (type == -1)
4330                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4331                 else
4332                         entry += (type * 64);
4333                 index = (entry >> 2) & 0x1F;
4334                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4335                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4336                 ivar |= (vector << (8 * (entry & 0x3)));
4337                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4338                 break;
4339
4340         case ixgbe_mac_82599EB:
4341         case ixgbe_mac_X540:
4342                 if (type == -1) { /* MISC IVAR */
4343                         index = (entry & 1) * 8;
4344                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4345                         ivar &= ~(0xFF << index);
4346                         ivar |= (vector << index);
4347                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4348                 } else {        /* RX/TX IVARS */
4349                         index = (16 * (entry & 1)) + (8 * type);
4350                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4351                         ivar &= ~(0xFF << index);
4352                         ivar |= (vector << index);
4353                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4354                 }
4355
4356                 break;
4357
4358         default:
4359                 break;
4360         }
4361 }
4362
4363 void __attribute__((cold))
4364 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4365 {
4366         uint16_t i, rx_using_sse;
4367         struct ixgbe_adapter *adapter =
4368                 (struct ixgbe_adapter *)dev->data->dev_private;
4369
4370         /*
4371          * In order to allow Vector Rx there are a few configuration
4372          * conditions to be met and Rx Bulk Allocation should be allowed.
4373          */
4374         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4375             !adapter->rx_bulk_alloc_allowed) {
4376                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4377                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4378                                     "not enabled",
4379                              dev->data->port_id);
4380
4381                 adapter->rx_vec_allowed = false;
4382         }
4383
4384         /*
4385          * Initialize the appropriate LRO callback.
4386          *
4387          * If all queues satisfy the bulk allocation preconditions
4388          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4389          * Otherwise use a single allocation version.
4390          */
4391         if (dev->data->lro) {
4392                 if (adapter->rx_bulk_alloc_allowed) {
4393                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4394                                            "allocation version");
4395                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4396                 } else {
4397                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4398                                            "allocation version");
4399                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4400                 }
4401         } else if (dev->data->scattered_rx) {
4402                 /*
4403                  * Set the non-LRO scattered callback: there are Vector and
4404                  * single allocation versions.
4405                  */
4406                 if (adapter->rx_vec_allowed) {
4407                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4408                                             "callback (port=%d).",
4409                                      dev->data->port_id);
4410
4411                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4412                 } else if (adapter->rx_bulk_alloc_allowed) {
4413                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4414                                            "allocation callback (port=%d).",
4415                                      dev->data->port_id);
4416                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4417                 } else {
4418                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4419                                             "single allocation) "
4420                                             "Scattered Rx callback "
4421                                             "(port=%d).",
4422                                      dev->data->port_id);
4423
4424                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4425                 }
4426         /*
4427          * Below we set "simple" callbacks according to port/queues parameters.
4428          * If parameters allow we are going to choose between the following
4429          * callbacks:
4430          *    - Vector
4431          *    - Bulk Allocation
4432          *    - Single buffer allocation (the simplest one)
4433          */
4434         } else if (adapter->rx_vec_allowed) {
4435                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4436                                     "burst size no less than %d (port=%d).",
4437                              RTE_IXGBE_DESCS_PER_LOOP,
4438                              dev->data->port_id);
4439
4440                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4441         } else if (adapter->rx_bulk_alloc_allowed) {
4442                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4443                                     "satisfied. Rx Burst Bulk Alloc function "
4444                                     "will be used on port=%d.",
4445                              dev->data->port_id);
4446
4447                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4448         } else {
4449                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4450                                     "satisfied, or Scattered Rx is requested "
4451                                     "(port=%d).",
4452                              dev->data->port_id);
4453
4454                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4455         }
4456
4457         /* Propagate information about RX function choice through all queues. */
4458
4459         rx_using_sse =
4460                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4461                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4462
4463         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4464                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4465
4466                 rxq->rx_using_sse = rx_using_sse;
4467         }
4468 }
4469
4470 /**
4471  * ixgbe_set_rsc - configure RSC related port HW registers
4472  *
4473  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4474  * of 82599 Spec (x540 configuration is virtually the same).
4475  *
4476  * @dev port handle
4477  *
4478  * Returns 0 in case of success or a non-zero error code
4479  */
4480 static int
4481 ixgbe_set_rsc(struct rte_eth_dev *dev)
4482 {
4483         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4484         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4485         struct rte_eth_dev_info dev_info = { 0 };
4486         bool rsc_capable = false;
4487         uint16_t i;
4488         uint32_t rdrxctl;
4489
4490         /* Sanity check */
4491         dev->dev_ops->dev_infos_get(dev, &dev_info);
4492         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4493                 rsc_capable = true;
4494
4495         if (!rsc_capable && rx_conf->enable_lro) {
4496                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4497                                    "support it");
4498                 return -EINVAL;
4499         }
4500
4501         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4502
4503         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4504                 /*
4505                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4506                  * 3.0 RSC configuration requires HW CRC stripping being
4507                  * enabled. If user requested both HW CRC stripping off
4508                  * and RSC on - return an error.
4509                  */
4510                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4511                                     "is disabled");
4512                 return -EINVAL;
4513         }
4514
4515         /* RFCTL configuration  */
4516         if (rsc_capable) {
4517                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4518
4519                 if (rx_conf->enable_lro)
4520                         /*
4521                          * Since NFS packets coalescing is not supported - clear
4522                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4523                          * enabled.
4524                          */
4525                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4526                                    IXGBE_RFCTL_NFSR_DIS);
4527                 else
4528                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4529
4530                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4531         }
4532
4533         /* If LRO hasn't been requested - we are done here. */
4534         if (!rx_conf->enable_lro)
4535                 return 0;
4536
4537         /* Set RDRXCTL.RSCACKC bit */
4538         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4539         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4540         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4541
4542         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4543         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4544                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4545                 uint32_t srrctl =
4546                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4547                 uint32_t rscctl =
4548                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4549                 uint32_t psrtype =
4550                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4551                 uint32_t eitr =
4552                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4553
4554                 /*
4555                  * ixgbe PMD doesn't support header-split at the moment.
4556                  *
4557                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4558                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4559                  * should be configured even if header split is not
4560                  * enabled. We will configure it 128 bytes following the
4561                  * recommendation in the spec.
4562                  */
4563                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4564                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4565                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4566
4567                 /*
4568                  * TODO: Consider setting the Receive Descriptor Minimum
4569                  * Threshold Size for an RSC case. This is not an obviously
4570                  * beneficiary option but the one worth considering...
4571                  */
4572
4573                 rscctl |= IXGBE_RSCCTL_RSCEN;
4574                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4575                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4576
4577                 /*
4578                  * RSC: Set ITR interval corresponding to 2K ints/s.
4579                  *
4580                  * Full-sized RSC aggregations for a 10Gb/s link will
4581                  * arrive at about 20K aggregation/s rate.
4582                  *
4583                  * 2K inst/s rate will make only 10% of the
4584                  * aggregations to be closed due to the interrupt timer
4585                  * expiration for a streaming at wire-speed case.
4586                  *
4587                  * For a sparse streaming case this setting will yield
4588                  * at most 500us latency for a single RSC aggregation.
4589                  */
4590                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4591                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4592
4593                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4594                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4595                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4596                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4597
4598                 /*
4599                  * RSC requires the mapping of the queue to the
4600                  * interrupt vector.
4601                  */
4602                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4603         }
4604
4605         dev->data->lro = 1;
4606
4607         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4608
4609         return 0;
4610 }
4611
4612 /*
4613  * Initializes Receive Unit.
4614  */
4615 int __attribute__((cold))
4616 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4617 {
4618         struct ixgbe_hw     *hw;
4619         struct ixgbe_rx_queue *rxq;
4620         uint64_t bus_addr;
4621         uint32_t rxctrl;
4622         uint32_t fctrl;
4623         uint32_t hlreg0;
4624         uint32_t maxfrs;
4625         uint32_t srrctl;
4626         uint32_t rdrxctl;
4627         uint32_t rxcsum;
4628         uint16_t buf_size;
4629         uint16_t i;
4630         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4631         int rc;
4632
4633         PMD_INIT_FUNC_TRACE();
4634         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4635
4636         /*
4637          * Make sure receives are disabled while setting
4638          * up the RX context (registers, descriptor rings, etc.).
4639          */
4640         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4641         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4642
4643         /* Enable receipt of broadcasted frames */
4644         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4645         fctrl |= IXGBE_FCTRL_BAM;
4646         fctrl |= IXGBE_FCTRL_DPF;
4647         fctrl |= IXGBE_FCTRL_PMCF;
4648         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4649
4650         /*
4651          * Configure CRC stripping, if any.
4652          */
4653         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4654         if (rx_conf->hw_strip_crc)
4655                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4656         else
4657                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4658
4659         /*
4660          * Configure jumbo frame support, if any.
4661          */
4662         if (rx_conf->jumbo_frame == 1) {
4663                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4664                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4665                 maxfrs &= 0x0000FFFF;
4666                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4667                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4668         } else
4669                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4670
4671         /*
4672          * If loopback mode is configured for 82599, set LPBK bit.
4673          */
4674         if (hw->mac.type == ixgbe_mac_82599EB &&
4675                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4676                 hlreg0 |= IXGBE_HLREG0_LPBK;
4677         else
4678                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4679
4680         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4681
4682         /* Setup RX queues */
4683         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4684                 rxq = dev->data->rx_queues[i];
4685
4686                 /*
4687                  * Reset crc_len in case it was changed after queue setup by a
4688                  * call to configure.
4689                  */
4690                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4691
4692                 /* Setup the Base and Length of the Rx Descriptor Rings */
4693                 bus_addr = rxq->rx_ring_phys_addr;
4694                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4695                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4696                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4697                                 (uint32_t)(bus_addr >> 32));
4698                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4699                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4700                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4701                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4702
4703                 /* Configure the SRRCTL register */
4704 #ifdef RTE_HEADER_SPLIT_ENABLE
4705                 /*
4706                  * Configure Header Split
4707                  */
4708                 if (rx_conf->header_split) {
4709                         if (hw->mac.type == ixgbe_mac_82599EB) {
4710                                 /* Must setup the PSRTYPE register */
4711                                 uint32_t psrtype;
4712
4713                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4714                                         IXGBE_PSRTYPE_UDPHDR   |
4715                                         IXGBE_PSRTYPE_IPV4HDR  |
4716                                         IXGBE_PSRTYPE_IPV6HDR;
4717                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4718                         }
4719                         srrctl = ((rx_conf->split_hdr_size <<
4720                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4721                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4722                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4723                 } else
4724 #endif
4725                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4726
4727                 /* Set if packets are dropped when no descriptors available */
4728                 if (rxq->drop_en)
4729                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4730
4731                 /*
4732                  * Configure the RX buffer size in the BSIZEPACKET field of
4733                  * the SRRCTL register of the queue.
4734                  * The value is in 1 KB resolution. Valid values can be from
4735                  * 1 KB to 16 KB.
4736                  */
4737                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4738                         RTE_PKTMBUF_HEADROOM);
4739                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4740                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4741
4742                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4743
4744                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4745                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4746
4747                 /* It adds dual VLAN length for supporting dual VLAN */
4748                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4749                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4750                         dev->data->scattered_rx = 1;
4751         }
4752
4753         if (rx_conf->enable_scatter)
4754                 dev->data->scattered_rx = 1;
4755
4756         /*
4757          * Device configured with multiple RX queues.
4758          */
4759         ixgbe_dev_mq_rx_configure(dev);
4760
4761         /*
4762          * Setup the Checksum Register.
4763          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4764          * Enable IP/L4 checkum computation by hardware if requested to do so.
4765          */
4766         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4767         rxcsum |= IXGBE_RXCSUM_PCSD;
4768         if (rx_conf->hw_ip_checksum)
4769                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4770         else
4771                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4772
4773         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4774
4775         if (hw->mac.type == ixgbe_mac_82599EB ||
4776             hw->mac.type == ixgbe_mac_X540) {
4777                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4778                 if (rx_conf->hw_strip_crc)
4779                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4780                 else
4781                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4782                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4783                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4784         }
4785
4786         rc = ixgbe_set_rsc(dev);
4787         if (rc)
4788                 return rc;
4789
4790         ixgbe_set_rx_function(dev);
4791
4792         return 0;
4793 }
4794
4795 /*
4796  * Initializes Transmit Unit.
4797  */
4798 void __attribute__((cold))
4799 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4800 {
4801         struct ixgbe_hw     *hw;
4802         struct ixgbe_tx_queue *txq;
4803         uint64_t bus_addr;
4804         uint32_t hlreg0;
4805         uint32_t txctrl;
4806         uint16_t i;
4807
4808         PMD_INIT_FUNC_TRACE();
4809         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4810
4811         /* Enable TX CRC (checksum offload requirement) and hw padding
4812          * (TSO requirement)
4813          */
4814         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4815         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4816         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4817
4818         /* Setup the Base and Length of the Tx Descriptor Rings */
4819         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4820                 txq = dev->data->tx_queues[i];
4821
4822                 bus_addr = txq->tx_ring_phys_addr;
4823                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4824                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4825                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4826                                 (uint32_t)(bus_addr >> 32));
4827                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4828                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4829                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4830                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4831                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4832
4833                 /*
4834                  * Disable Tx Head Writeback RO bit, since this hoses
4835                  * bookkeeping if things aren't delivered in order.
4836                  */
4837                 switch (hw->mac.type) {
4838                 case ixgbe_mac_82598EB:
4839                         txctrl = IXGBE_READ_REG(hw,
4840                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4841                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4842                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4843                                         txctrl);
4844                         break;
4845
4846                 case ixgbe_mac_82599EB:
4847                 case ixgbe_mac_X540:
4848                 case ixgbe_mac_X550:
4849                 case ixgbe_mac_X550EM_x:
4850                 case ixgbe_mac_X550EM_a:
4851                 default:
4852                         txctrl = IXGBE_READ_REG(hw,
4853                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4854                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4855                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4856                                         txctrl);
4857                         break;
4858                 }
4859         }
4860
4861         /* Device configured with multiple TX queues. */
4862         ixgbe_dev_mq_tx_configure(dev);
4863 }
4864
4865 /*
4866  * Set up link for 82599 loopback mode Tx->Rx.
4867  */
4868 static inline void __attribute__((cold))
4869 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4870 {
4871         PMD_INIT_FUNC_TRACE();
4872
4873         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4874                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4875                                 IXGBE_SUCCESS) {
4876                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4877                         /* ignore error */
4878                         return;
4879                 }
4880         }
4881
4882         /* Restart link */
4883         IXGBE_WRITE_REG(hw,
4884                         IXGBE_AUTOC,
4885                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4886         ixgbe_reset_pipeline_82599(hw);
4887
4888         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4889         msec_delay(50);
4890 }
4891
4892
4893 /*
4894  * Start Transmit and Receive Units.
4895  */
4896 int __attribute__((cold))
4897 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4898 {
4899         struct ixgbe_hw     *hw;
4900         struct ixgbe_tx_queue *txq;
4901         struct ixgbe_rx_queue *rxq;
4902         uint32_t txdctl;
4903         uint32_t dmatxctl;
4904         uint32_t rxctrl;
4905         uint16_t i;
4906         int ret = 0;
4907
4908         PMD_INIT_FUNC_TRACE();
4909         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4910
4911         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4912                 txq = dev->data->tx_queues[i];
4913                 /* Setup Transmit Threshold Registers */
4914                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4915                 txdctl |= txq->pthresh & 0x7F;
4916                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4917                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4918                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4919         }
4920
4921         if (hw->mac.type != ixgbe_mac_82598EB) {
4922                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4923                 dmatxctl |= IXGBE_DMATXCTL_TE;
4924                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4925         }
4926
4927         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4928                 txq = dev->data->tx_queues[i];
4929                 if (!txq->tx_deferred_start) {
4930                         ret = ixgbe_dev_tx_queue_start(dev, i);
4931                         if (ret < 0)
4932                                 return ret;
4933                 }
4934         }
4935
4936         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4937                 rxq = dev->data->rx_queues[i];
4938                 if (!rxq->rx_deferred_start) {
4939                         ret = ixgbe_dev_rx_queue_start(dev, i);
4940                         if (ret < 0)
4941                                 return ret;
4942                 }
4943         }
4944
4945         /* Enable Receive engine */
4946         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4947         if (hw->mac.type == ixgbe_mac_82598EB)
4948                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4949         rxctrl |= IXGBE_RXCTRL_RXEN;
4950         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4951
4952         /* If loopback mode is enabled for 82599, set up the link accordingly */
4953         if (hw->mac.type == ixgbe_mac_82599EB &&
4954                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4955                 ixgbe_setup_loopback_link_82599(hw);
4956
4957         return 0;
4958 }
4959
4960 /*
4961  * Start Receive Units for specified queue.
4962  */
4963 int __attribute__((cold))
4964 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4965 {
4966         struct ixgbe_hw     *hw;
4967         struct ixgbe_rx_queue *rxq;
4968         uint32_t rxdctl;
4969         int poll_ms;
4970
4971         PMD_INIT_FUNC_TRACE();
4972         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4973
4974         if (rx_queue_id < dev->data->nb_rx_queues) {
4975                 rxq = dev->data->rx_queues[rx_queue_id];
4976
4977                 /* Allocate buffers for descriptor rings */
4978                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4979                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4980                                      rx_queue_id);
4981                         return -1;
4982                 }
4983                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4984                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4985                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4986
4987                 /* Wait until RX Enable ready */
4988                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
4989                 do {
4990                         rte_delay_ms(1);
4991                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4992                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
4993                 if (!poll_ms)
4994                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
4995                                      rx_queue_id);
4996                 rte_wmb();
4997                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4998                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
4999                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5000         } else
5001                 return -1;
5002
5003         return 0;
5004 }
5005
5006 /*
5007  * Stop Receive Units for specified queue.
5008  */
5009 int __attribute__((cold))
5010 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5011 {
5012         struct ixgbe_hw     *hw;
5013         struct ixgbe_adapter *adapter =
5014                 (struct ixgbe_adapter *)dev->data->dev_private;
5015         struct ixgbe_rx_queue *rxq;
5016         uint32_t rxdctl;
5017         int poll_ms;
5018
5019         PMD_INIT_FUNC_TRACE();
5020         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5021
5022         if (rx_queue_id < dev->data->nb_rx_queues) {
5023                 rxq = dev->data->rx_queues[rx_queue_id];
5024
5025                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5026                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5027                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5028
5029                 /* Wait until RX Enable bit clear */
5030                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5031                 do {
5032                         rte_delay_ms(1);
5033                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5034                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5035                 if (!poll_ms)
5036                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5037                                      rx_queue_id);
5038
5039                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5040
5041                 ixgbe_rx_queue_release_mbufs(rxq);
5042                 ixgbe_reset_rx_queue(adapter, rxq);
5043                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5044         } else
5045                 return -1;
5046
5047         return 0;
5048 }
5049
5050
5051 /*
5052  * Start Transmit Units for specified queue.
5053  */
5054 int __attribute__((cold))
5055 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5056 {
5057         struct ixgbe_hw     *hw;
5058         struct ixgbe_tx_queue *txq;
5059         uint32_t txdctl;
5060         int poll_ms;
5061
5062         PMD_INIT_FUNC_TRACE();
5063         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5064
5065         if (tx_queue_id < dev->data->nb_tx_queues) {
5066                 txq = dev->data->tx_queues[tx_queue_id];
5067                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5068                 txdctl |= IXGBE_TXDCTL_ENABLE;
5069                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5070
5071                 /* Wait until TX Enable ready */
5072                 if (hw->mac.type == ixgbe_mac_82599EB) {
5073                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5074                         do {
5075                                 rte_delay_ms(1);
5076                                 txdctl = IXGBE_READ_REG(hw,
5077                                         IXGBE_TXDCTL(txq->reg_idx));
5078                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5079                         if (!poll_ms)
5080                                 PMD_INIT_LOG(ERR, "Could not enable "
5081                                              "Tx Queue %d", tx_queue_id);
5082                 }
5083                 rte_wmb();
5084                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5085                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5086                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5087         } else
5088                 return -1;
5089
5090         return 0;
5091 }
5092
5093 /*
5094  * Stop Transmit Units for specified queue.
5095  */
5096 int __attribute__((cold))
5097 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5098 {
5099         struct ixgbe_hw     *hw;
5100         struct ixgbe_tx_queue *txq;
5101         uint32_t txdctl;
5102         uint32_t txtdh, txtdt;
5103         int poll_ms;
5104
5105         PMD_INIT_FUNC_TRACE();
5106         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5107
5108         if (tx_queue_id >= dev->data->nb_tx_queues)
5109                 return -1;
5110
5111         txq = dev->data->tx_queues[tx_queue_id];
5112
5113         /* Wait until TX queue is empty */
5114         if (hw->mac.type == ixgbe_mac_82599EB) {
5115                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5116                 do {
5117                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5118                         txtdh = IXGBE_READ_REG(hw,
5119                                                IXGBE_TDH(txq->reg_idx));
5120                         txtdt = IXGBE_READ_REG(hw,
5121                                                IXGBE_TDT(txq->reg_idx));
5122                 } while (--poll_ms && (txtdh != txtdt));
5123                 if (!poll_ms)
5124                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5125                                      "when stopping.", tx_queue_id);
5126         }
5127
5128         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5129         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5130         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5131
5132         /* Wait until TX Enable bit clear */
5133         if (hw->mac.type == ixgbe_mac_82599EB) {
5134                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5135                 do {
5136                         rte_delay_ms(1);
5137                         txdctl = IXGBE_READ_REG(hw,
5138                                                 IXGBE_TXDCTL(txq->reg_idx));
5139                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5140                 if (!poll_ms)
5141                         PMD_INIT_LOG(ERR, "Could not disable "
5142                                      "Tx Queue %d", tx_queue_id);
5143         }
5144
5145         if (txq->ops != NULL) {
5146                 txq->ops->release_mbufs(txq);
5147                 txq->ops->reset(txq);
5148         }
5149         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5150
5151         return 0;
5152 }
5153
5154 void
5155 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5156         struct rte_eth_rxq_info *qinfo)
5157 {
5158         struct ixgbe_rx_queue *rxq;
5159
5160         rxq = dev->data->rx_queues[queue_id];
5161
5162         qinfo->mp = rxq->mb_pool;
5163         qinfo->scattered_rx = dev->data->scattered_rx;
5164         qinfo->nb_desc = rxq->nb_rx_desc;
5165
5166         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5167         qinfo->conf.rx_drop_en = rxq->drop_en;
5168         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5169 }
5170
5171 void
5172 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5173         struct rte_eth_txq_info *qinfo)
5174 {
5175         struct ixgbe_tx_queue *txq;
5176
5177         txq = dev->data->tx_queues[queue_id];
5178
5179         qinfo->nb_desc = txq->nb_tx_desc;
5180
5181         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5182         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5183         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5184
5185         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5186         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5187         qinfo->conf.txq_flags = txq->txq_flags;
5188         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5189 }
5190
5191 /*
5192  * [VF] Initializes Receive Unit.
5193  */
5194 int __attribute__((cold))
5195 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5196 {
5197         struct ixgbe_hw     *hw;
5198         struct ixgbe_rx_queue *rxq;
5199         uint64_t bus_addr;
5200         uint32_t srrctl, psrtype = 0;
5201         uint16_t buf_size;
5202         uint16_t i;
5203         int ret;
5204
5205         PMD_INIT_FUNC_TRACE();
5206         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5207
5208         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5209                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5210                         "it should be power of 2");
5211                 return -1;
5212         }
5213
5214         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5215                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5216                         "it should be equal to or less than %d",
5217                         hw->mac.max_rx_queues);
5218                 return -1;
5219         }
5220
5221         /*
5222          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5223          * disables the VF receipt of packets if the PF MTU is > 1500.
5224          * This is done to deal with 82599 limitations that imposes
5225          * the PF and all VFs to share the same MTU.
5226          * Then, the PF driver enables again the VF receipt of packet when
5227          * the VF driver issues a IXGBE_VF_SET_LPE request.
5228          * In the meantime, the VF device cannot be used, even if the VF driver
5229          * and the Guest VM network stack are ready to accept packets with a
5230          * size up to the PF MTU.
5231          * As a work-around to this PF behaviour, force the call to
5232          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5233          * VF packets received can work in all cases.
5234          */
5235         ixgbevf_rlpml_set_vf(hw,
5236                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5237
5238         /* Setup RX queues */
5239         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5240                 rxq = dev->data->rx_queues[i];
5241
5242                 /* Allocate buffers for descriptor rings */
5243                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5244                 if (ret)
5245                         return ret;
5246
5247                 /* Setup the Base and Length of the Rx Descriptor Rings */
5248                 bus_addr = rxq->rx_ring_phys_addr;
5249
5250                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5251                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5252                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5253                                 (uint32_t)(bus_addr >> 32));
5254                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5255                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5256                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5257                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5258
5259
5260                 /* Configure the SRRCTL register */
5261 #ifdef RTE_HEADER_SPLIT_ENABLE
5262                 /*
5263                  * Configure Header Split
5264                  */
5265                 if (dev->data->dev_conf.rxmode.header_split) {
5266                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5267                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5268                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5269                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5270                 } else
5271 #endif
5272                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5273
5274                 /* Set if packets are dropped when no descriptors available */
5275                 if (rxq->drop_en)
5276                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5277
5278                 /*
5279                  * Configure the RX buffer size in the BSIZEPACKET field of
5280                  * the SRRCTL register of the queue.
5281                  * The value is in 1 KB resolution. Valid values can be from
5282                  * 1 KB to 16 KB.
5283                  */
5284                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5285                         RTE_PKTMBUF_HEADROOM);
5286                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5287                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5288
5289                 /*
5290                  * VF modification to write virtual function SRRCTL register
5291                  */
5292                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5293
5294                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5295                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5296
5297                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5298                     /* It adds dual VLAN length for supporting dual VLAN */
5299                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5300                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5301                         if (!dev->data->scattered_rx)
5302                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5303                         dev->data->scattered_rx = 1;
5304                 }
5305         }
5306
5307 #ifdef RTE_HEADER_SPLIT_ENABLE
5308         if (dev->data->dev_conf.rxmode.header_split)
5309                 /* Must setup the PSRTYPE register */
5310                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5311                         IXGBE_PSRTYPE_UDPHDR   |
5312                         IXGBE_PSRTYPE_IPV4HDR  |
5313                         IXGBE_PSRTYPE_IPV6HDR;
5314 #endif
5315
5316         /* Set RQPL for VF RSS according to max Rx queue */
5317         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5318                 IXGBE_PSRTYPE_RQPL_SHIFT;
5319         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5320
5321         ixgbe_set_rx_function(dev);
5322
5323         return 0;
5324 }
5325
5326 /*
5327  * [VF] Initializes Transmit Unit.
5328  */
5329 void __attribute__((cold))
5330 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5331 {
5332         struct ixgbe_hw     *hw;
5333         struct ixgbe_tx_queue *txq;
5334         uint64_t bus_addr;
5335         uint32_t txctrl;
5336         uint16_t i;
5337
5338         PMD_INIT_FUNC_TRACE();
5339         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5340
5341         /* Setup the Base and Length of the Tx Descriptor Rings */
5342         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5343                 txq = dev->data->tx_queues[i];
5344                 bus_addr = txq->tx_ring_phys_addr;
5345                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5346                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5347                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5348                                 (uint32_t)(bus_addr >> 32));
5349                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5350                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5351                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5352                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5353                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5354
5355                 /*
5356                  * Disable Tx Head Writeback RO bit, since this hoses
5357                  * bookkeeping if things aren't delivered in order.
5358                  */
5359                 txctrl = IXGBE_READ_REG(hw,
5360                                 IXGBE_VFDCA_TXCTRL(i));
5361                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5362                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5363                                 txctrl);
5364         }
5365 }
5366
5367 /*
5368  * [VF] Start Transmit and Receive Units.
5369  */
5370 void __attribute__((cold))
5371 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5372 {
5373         struct ixgbe_hw     *hw;
5374         struct ixgbe_tx_queue *txq;
5375         struct ixgbe_rx_queue *rxq;
5376         uint32_t txdctl;
5377         uint32_t rxdctl;
5378         uint16_t i;
5379         int poll_ms;
5380
5381         PMD_INIT_FUNC_TRACE();
5382         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5383
5384         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5385                 txq = dev->data->tx_queues[i];
5386                 /* Setup Transmit Threshold Registers */
5387                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5388                 txdctl |= txq->pthresh & 0x7F;
5389                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5390                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5391                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5392         }
5393
5394         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5395
5396                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5397                 txdctl |= IXGBE_TXDCTL_ENABLE;
5398                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5399
5400                 poll_ms = 10;
5401                 /* Wait until TX Enable ready */
5402                 do {
5403                         rte_delay_ms(1);
5404                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5405                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5406                 if (!poll_ms)
5407                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5408         }
5409         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5410
5411                 rxq = dev->data->rx_queues[i];
5412
5413                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5414                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5415                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5416
5417                 /* Wait until RX Enable ready */
5418                 poll_ms = 10;
5419                 do {
5420                         rte_delay_ms(1);
5421                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5422                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5423                 if (!poll_ms)
5424                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5425                 rte_wmb();
5426                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5427
5428         }
5429 }
5430
5431 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5432 int __attribute__((weak))
5433 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5434 {
5435         return -1;
5436 }
5437
5438 uint16_t __attribute__((weak))
5439 ixgbe_recv_pkts_vec(
5440         void __rte_unused *rx_queue,
5441         struct rte_mbuf __rte_unused **rx_pkts,
5442         uint16_t __rte_unused nb_pkts)
5443 {
5444         return 0;
5445 }
5446
5447 uint16_t __attribute__((weak))
5448 ixgbe_recv_scattered_pkts_vec(
5449         void __rte_unused *rx_queue,
5450         struct rte_mbuf __rte_unused **rx_pkts,
5451         uint16_t __rte_unused nb_pkts)
5452 {
5453         return 0;
5454 }
5455
5456 int __attribute__((weak))
5457 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5458 {
5459         return -1;
5460 }