net/ixgbe: fix TC bandwidth setting
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 /*********************************************************************
115  *
116  *  TX functions
117  *
118  **********************************************************************/
119
120 /*
121  * Check for descriptors with their DD bit set and free mbufs.
122  * Return the total number of buffers freed.
123  */
124 static inline int __attribute__((always_inline))
125 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
126 {
127         struct ixgbe_tx_entry *txep;
128         uint32_t status;
129         int i, nb_free = 0;
130         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
131
132         /* check DD bit on threshold descriptor */
133         status = txq->tx_ring[txq->tx_next_dd].wb.status;
134         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
135                 return 0;
136
137         /*
138          * first buffer to free from S/W ring is at index
139          * tx_next_dd - (tx_rs_thresh-1)
140          */
141         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
142
143         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
144                 /* free buffers one at a time */
145                 m = __rte_pktmbuf_prefree_seg(txep->mbuf);
146                 txep->mbuf = NULL;
147
148                 if (unlikely(m == NULL))
149                         continue;
150
151                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
152                     (nb_free > 0 && m->pool != free[0]->pool)) {
153                         rte_mempool_put_bulk(free[0]->pool,
154                                              (void **)free, nb_free);
155                         nb_free = 0;
156                 }
157
158                 free[nb_free++] = m;
159         }
160
161         if (nb_free > 0)
162                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
163
164         /* buffers were freed, update counters */
165         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
166         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
167         if (txq->tx_next_dd >= txq->nb_tx_desc)
168                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
169
170         return txq->tx_rs_thresh;
171 }
172
173 /* Populate 4 descriptors with data from 4 mbufs */
174 static inline void
175 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
176 {
177         uint64_t buf_dma_addr;
178         uint32_t pkt_len;
179         int i;
180
181         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
182                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
183                 pkt_len = (*pkts)->data_len;
184
185                 /* write data to descriptor */
186                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
187
188                 txdp->read.cmd_type_len =
189                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
190
191                 txdp->read.olinfo_status =
192                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
193
194                 rte_prefetch0(&(*pkts)->pool);
195         }
196 }
197
198 /* Populate 1 descriptor with data from 1 mbuf */
199 static inline void
200 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
201 {
202         uint64_t buf_dma_addr;
203         uint32_t pkt_len;
204
205         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
206         pkt_len = (*pkts)->data_len;
207
208         /* write data to descriptor */
209         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
210         txdp->read.cmd_type_len =
211                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
212         txdp->read.olinfo_status =
213                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
214         rte_prefetch0(&(*pkts)->pool);
215 }
216
217 /*
218  * Fill H/W descriptor ring with mbuf data.
219  * Copy mbuf pointers to the S/W ring.
220  */
221 static inline void
222 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
223                       uint16_t nb_pkts)
224 {
225         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
226         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
227         const int N_PER_LOOP = 4;
228         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
229         int mainpart, leftover;
230         int i, j;
231
232         /*
233          * Process most of the packets in chunks of N pkts.  Any
234          * leftover packets will get processed one at a time.
235          */
236         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
237         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
238         for (i = 0; i < mainpart; i += N_PER_LOOP) {
239                 /* Copy N mbuf pointers to the S/W ring */
240                 for (j = 0; j < N_PER_LOOP; ++j) {
241                         (txep + i + j)->mbuf = *(pkts + i + j);
242                 }
243                 tx4(txdp + i, pkts + i);
244         }
245
246         if (unlikely(leftover > 0)) {
247                 for (i = 0; i < leftover; ++i) {
248                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
249                         tx1(txdp + mainpart + i, pkts + mainpart + i);
250                 }
251         }
252 }
253
254 static inline uint16_t
255 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
256              uint16_t nb_pkts)
257 {
258         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
259         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
260         uint16_t n = 0;
261
262         /*
263          * Begin scanning the H/W ring for done descriptors when the
264          * number of available descriptors drops below tx_free_thresh.  For
265          * each done descriptor, free the associated buffer.
266          */
267         if (txq->nb_tx_free < txq->tx_free_thresh)
268                 ixgbe_tx_free_bufs(txq);
269
270         /* Only use descriptors that are available */
271         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
272         if (unlikely(nb_pkts == 0))
273                 return 0;
274
275         /* Use exactly nb_pkts descriptors */
276         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
277
278         /*
279          * At this point, we know there are enough descriptors in the
280          * ring to transmit all the packets.  This assumes that each
281          * mbuf contains a single segment, and that no new offloads
282          * are expected, which would require a new context descriptor.
283          */
284
285         /*
286          * See if we're going to wrap-around. If so, handle the top
287          * of the descriptor ring first, then do the bottom.  If not,
288          * the processing looks just like the "bottom" part anyway...
289          */
290         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
291                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
292                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
293
294                 /*
295                  * We know that the last descriptor in the ring will need to
296                  * have its RS bit set because tx_rs_thresh has to be
297                  * a divisor of the ring size
298                  */
299                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
300                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
301                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
302
303                 txq->tx_tail = 0;
304         }
305
306         /* Fill H/W descriptor ring with mbuf data */
307         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
308         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
309
310         /*
311          * Determine if RS bit should be set
312          * This is what we actually want:
313          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
314          * but instead of subtracting 1 and doing >=, we can just do
315          * greater than without subtracting.
316          */
317         if (txq->tx_tail > txq->tx_next_rs) {
318                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
319                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
320                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
321                                                 txq->tx_rs_thresh);
322                 if (txq->tx_next_rs >= txq->nb_tx_desc)
323                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
324         }
325
326         /*
327          * Check for wrap-around. This would only happen if we used
328          * up to the last descriptor in the ring, no more, no less.
329          */
330         if (txq->tx_tail >= txq->nb_tx_desc)
331                 txq->tx_tail = 0;
332
333         /* update tail pointer */
334         rte_wmb();
335         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
336
337         return nb_pkts;
338 }
339
340 uint16_t
341 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
342                        uint16_t nb_pkts)
343 {
344         uint16_t nb_tx;
345
346         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
347         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
348                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
349
350         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
351         nb_tx = 0;
352         while (nb_pkts) {
353                 uint16_t ret, n;
354
355                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
356                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
357                 nb_tx = (uint16_t)(nb_tx + ret);
358                 nb_pkts = (uint16_t)(nb_pkts - ret);
359                 if (ret < n)
360                         break;
361         }
362
363         return nb_tx;
364 }
365
366 static inline void
367 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
368                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
369                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
370 {
371         uint32_t type_tucmd_mlhl;
372         uint32_t mss_l4len_idx = 0;
373         uint32_t ctx_idx;
374         uint32_t vlan_macip_lens;
375         union ixgbe_tx_offload tx_offload_mask;
376         uint32_t seqnum_seed = 0;
377
378         ctx_idx = txq->ctx_curr;
379         tx_offload_mask.data[0] = 0;
380         tx_offload_mask.data[1] = 0;
381         type_tucmd_mlhl = 0;
382
383         /* Specify which HW CTX to upload. */
384         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
385
386         if (ol_flags & PKT_TX_VLAN_PKT) {
387                 tx_offload_mask.vlan_tci |= ~0;
388         }
389
390         /* check if TCP segmentation required for this packet */
391         if (ol_flags & PKT_TX_TCP_SEG) {
392                 /* implies IP cksum in IPv4 */
393                 if (ol_flags & PKT_TX_IP_CKSUM)
394                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
395                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
396                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
397                 else
398                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
399                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
400                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
401
402                 tx_offload_mask.l2_len |= ~0;
403                 tx_offload_mask.l3_len |= ~0;
404                 tx_offload_mask.l4_len |= ~0;
405                 tx_offload_mask.tso_segsz |= ~0;
406                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
407                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
408         } else { /* no TSO, check if hardware checksum is needed */
409                 if (ol_flags & PKT_TX_IP_CKSUM) {
410                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
411                         tx_offload_mask.l2_len |= ~0;
412                         tx_offload_mask.l3_len |= ~0;
413                 }
414
415                 switch (ol_flags & PKT_TX_L4_MASK) {
416                 case PKT_TX_UDP_CKSUM:
417                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
418                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
419                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
420                         tx_offload_mask.l2_len |= ~0;
421                         tx_offload_mask.l3_len |= ~0;
422                         break;
423                 case PKT_TX_TCP_CKSUM:
424                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
427                         tx_offload_mask.l2_len |= ~0;
428                         tx_offload_mask.l3_len |= ~0;
429                         break;
430                 case PKT_TX_SCTP_CKSUM:
431                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
432                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
433                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
434                         tx_offload_mask.l2_len |= ~0;
435                         tx_offload_mask.l3_len |= ~0;
436                         break;
437                 default:
438                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
439                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
440                         break;
441                 }
442         }
443
444         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
445                 tx_offload_mask.outer_l2_len |= ~0;
446                 tx_offload_mask.outer_l3_len |= ~0;
447                 tx_offload_mask.l2_len |= ~0;
448                 seqnum_seed |= tx_offload.outer_l3_len
449                                << IXGBE_ADVTXD_OUTER_IPLEN;
450                 seqnum_seed |= tx_offload.l2_len
451                                << IXGBE_ADVTXD_TUNNEL_LEN;
452         }
453
454         txq->ctx_cache[ctx_idx].flags = ol_flags;
455         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
456                 tx_offload_mask.data[0] & tx_offload.data[0];
457         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
458                 tx_offload_mask.data[1] & tx_offload.data[1];
459         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
460
461         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
462         vlan_macip_lens = tx_offload.l3_len;
463         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
464                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
465                                     IXGBE_ADVTXD_MACLEN_SHIFT);
466         else
467                 vlan_macip_lens |= (tx_offload.l2_len <<
468                                     IXGBE_ADVTXD_MACLEN_SHIFT);
469         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
470         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
471         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
472         ctx_txd->seqnum_seed     = seqnum_seed;
473 }
474
475 /*
476  * Check which hardware context can be used. Use the existing match
477  * or create a new context descriptor.
478  */
479 static inline uint32_t
480 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
481                    union ixgbe_tx_offload tx_offload)
482 {
483         /* If match with the current used context */
484         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
485                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
486                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
487                      & tx_offload.data[0])) &&
488                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
489                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
490                      & tx_offload.data[1]))))
491                 return txq->ctx_curr;
492
493         /* What if match with the next context  */
494         txq->ctx_curr ^= 1;
495         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
496                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
497                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
498                      & tx_offload.data[0])) &&
499                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
500                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
501                      & tx_offload.data[1]))))
502                 return txq->ctx_curr;
503
504         /* Mismatch, use the previous context */
505         return IXGBE_CTX_NUM;
506 }
507
508 static inline uint32_t
509 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
510 {
511         uint32_t tmp = 0;
512
513         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
514                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
515         if (ol_flags & PKT_TX_IP_CKSUM)
516                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
517         if (ol_flags & PKT_TX_TCP_SEG)
518                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
519         return tmp;
520 }
521
522 static inline uint32_t
523 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
524 {
525         uint32_t cmdtype = 0;
526
527         if (ol_flags & PKT_TX_VLAN_PKT)
528                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
529         if (ol_flags & PKT_TX_TCP_SEG)
530                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
531         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
532                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
533         if (ol_flags & PKT_TX_MACSEC)
534                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
535         return cmdtype;
536 }
537
538 /* Default RS bit threshold values */
539 #ifndef DEFAULT_TX_RS_THRESH
540 #define DEFAULT_TX_RS_THRESH   32
541 #endif
542 #ifndef DEFAULT_TX_FREE_THRESH
543 #define DEFAULT_TX_FREE_THRESH 32
544 #endif
545
546 /* Reset transmit descriptors after they have been used */
547 static inline int
548 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
549 {
550         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
551         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
552         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
553         uint16_t nb_tx_desc = txq->nb_tx_desc;
554         uint16_t desc_to_clean_to;
555         uint16_t nb_tx_to_clean;
556         uint32_t status;
557
558         /* Determine the last descriptor needing to be cleaned */
559         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
560         if (desc_to_clean_to >= nb_tx_desc)
561                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
562
563         /* Check to make sure the last descriptor to clean is done */
564         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
565         status = txr[desc_to_clean_to].wb.status;
566         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
567                 PMD_TX_FREE_LOG(DEBUG,
568                                 "TX descriptor %4u is not done"
569                                 "(port=%d queue=%d)",
570                                 desc_to_clean_to,
571                                 txq->port_id, txq->queue_id);
572                 /* Failed to clean any descriptors, better luck next time */
573                 return -(1);
574         }
575
576         /* Figure out how many descriptors will be cleaned */
577         if (last_desc_cleaned > desc_to_clean_to)
578                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
579                                                         desc_to_clean_to);
580         else
581                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
582                                                 last_desc_cleaned);
583
584         PMD_TX_FREE_LOG(DEBUG,
585                         "Cleaning %4u TX descriptors: %4u to %4u "
586                         "(port=%d queue=%d)",
587                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
588                         txq->port_id, txq->queue_id);
589
590         /*
591          * The last descriptor to clean is done, so that means all the
592          * descriptors from the last descriptor that was cleaned
593          * up to the last descriptor with the RS bit set
594          * are done. Only reset the threshold descriptor.
595          */
596         txr[desc_to_clean_to].wb.status = 0;
597
598         /* Update the txq to reflect the last descriptor that was cleaned */
599         txq->last_desc_cleaned = desc_to_clean_to;
600         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
601
602         /* No Error */
603         return 0;
604 }
605
606 uint16_t
607 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
608                 uint16_t nb_pkts)
609 {
610         struct ixgbe_tx_queue *txq;
611         struct ixgbe_tx_entry *sw_ring;
612         struct ixgbe_tx_entry *txe, *txn;
613         volatile union ixgbe_adv_tx_desc *txr;
614         volatile union ixgbe_adv_tx_desc *txd, *txp;
615         struct rte_mbuf     *tx_pkt;
616         struct rte_mbuf     *m_seg;
617         uint64_t buf_dma_addr;
618         uint32_t olinfo_status;
619         uint32_t cmd_type_len;
620         uint32_t pkt_len;
621         uint16_t slen;
622         uint64_t ol_flags;
623         uint16_t tx_id;
624         uint16_t tx_last;
625         uint16_t nb_tx;
626         uint16_t nb_used;
627         uint64_t tx_ol_req;
628         uint32_t ctx = 0;
629         uint32_t new_ctx;
630         union ixgbe_tx_offload tx_offload;
631
632         tx_offload.data[0] = 0;
633         tx_offload.data[1] = 0;
634         txq = tx_queue;
635         sw_ring = txq->sw_ring;
636         txr     = txq->tx_ring;
637         tx_id   = txq->tx_tail;
638         txe = &sw_ring[tx_id];
639         txp = NULL;
640
641         /* Determine if the descriptor ring needs to be cleaned. */
642         if (txq->nb_tx_free < txq->tx_free_thresh)
643                 ixgbe_xmit_cleanup(txq);
644
645         rte_prefetch0(&txe->mbuf->pool);
646
647         /* TX loop */
648         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
649                 new_ctx = 0;
650                 tx_pkt = *tx_pkts++;
651                 pkt_len = tx_pkt->pkt_len;
652
653                 /*
654                  * Determine how many (if any) context descriptors
655                  * are needed for offload functionality.
656                  */
657                 ol_flags = tx_pkt->ol_flags;
658
659                 /* If hardware offload required */
660                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
661                 if (tx_ol_req) {
662                         tx_offload.l2_len = tx_pkt->l2_len;
663                         tx_offload.l3_len = tx_pkt->l3_len;
664                         tx_offload.l4_len = tx_pkt->l4_len;
665                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
666                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
667                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
668                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
669
670                         /* If new context need be built or reuse the exist ctx. */
671                         ctx = what_advctx_update(txq, tx_ol_req,
672                                 tx_offload);
673                         /* Only allocate context descriptor if required*/
674                         new_ctx = (ctx == IXGBE_CTX_NUM);
675                         ctx = txq->ctx_curr;
676                 }
677
678                 /*
679                  * Keep track of how many descriptors are used this loop
680                  * This will always be the number of segments + the number of
681                  * Context descriptors required to transmit the packet
682                  */
683                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
684
685                 if (txp != NULL &&
686                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
687                         /* set RS on the previous packet in the burst */
688                         txp->read.cmd_type_len |=
689                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
690
691                 /*
692                  * The number of descriptors that must be allocated for a
693                  * packet is the number of segments of that packet, plus 1
694                  * Context Descriptor for the hardware offload, if any.
695                  * Determine the last TX descriptor to allocate in the TX ring
696                  * for the packet, starting from the current position (tx_id)
697                  * in the ring.
698                  */
699                 tx_last = (uint16_t) (tx_id + nb_used - 1);
700
701                 /* Circular ring */
702                 if (tx_last >= txq->nb_tx_desc)
703                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
704
705                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
706                            " tx_first=%u tx_last=%u",
707                            (unsigned) txq->port_id,
708                            (unsigned) txq->queue_id,
709                            (unsigned) pkt_len,
710                            (unsigned) tx_id,
711                            (unsigned) tx_last);
712
713                 /*
714                  * Make sure there are enough TX descriptors available to
715                  * transmit the entire packet.
716                  * nb_used better be less than or equal to txq->tx_rs_thresh
717                  */
718                 if (nb_used > txq->nb_tx_free) {
719                         PMD_TX_FREE_LOG(DEBUG,
720                                         "Not enough free TX descriptors "
721                                         "nb_used=%4u nb_free=%4u "
722                                         "(port=%d queue=%d)",
723                                         nb_used, txq->nb_tx_free,
724                                         txq->port_id, txq->queue_id);
725
726                         if (ixgbe_xmit_cleanup(txq) != 0) {
727                                 /* Could not clean any descriptors */
728                                 if (nb_tx == 0)
729                                         return 0;
730                                 goto end_of_tx;
731                         }
732
733                         /* nb_used better be <= txq->tx_rs_thresh */
734                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
735                                 PMD_TX_FREE_LOG(DEBUG,
736                                         "The number of descriptors needed to "
737                                         "transmit the packet exceeds the "
738                                         "RS bit threshold. This will impact "
739                                         "performance."
740                                         "nb_used=%4u nb_free=%4u "
741                                         "tx_rs_thresh=%4u. "
742                                         "(port=%d queue=%d)",
743                                         nb_used, txq->nb_tx_free,
744                                         txq->tx_rs_thresh,
745                                         txq->port_id, txq->queue_id);
746                                 /*
747                                  * Loop here until there are enough TX
748                                  * descriptors or until the ring cannot be
749                                  * cleaned.
750                                  */
751                                 while (nb_used > txq->nb_tx_free) {
752                                         if (ixgbe_xmit_cleanup(txq) != 0) {
753                                                 /*
754                                                  * Could not clean any
755                                                  * descriptors
756                                                  */
757                                                 if (nb_tx == 0)
758                                                         return 0;
759                                                 goto end_of_tx;
760                                         }
761                                 }
762                         }
763                 }
764
765                 /*
766                  * By now there are enough free TX descriptors to transmit
767                  * the packet.
768                  */
769
770                 /*
771                  * Set common flags of all TX Data Descriptors.
772                  *
773                  * The following bits must be set in all Data Descriptors:
774                  *   - IXGBE_ADVTXD_DTYP_DATA
775                  *   - IXGBE_ADVTXD_DCMD_DEXT
776                  *
777                  * The following bits must be set in the first Data Descriptor
778                  * and are ignored in the other ones:
779                  *   - IXGBE_ADVTXD_DCMD_IFCS
780                  *   - IXGBE_ADVTXD_MAC_1588
781                  *   - IXGBE_ADVTXD_DCMD_VLE
782                  *
783                  * The following bits must only be set in the last Data
784                  * Descriptor:
785                  *   - IXGBE_TXD_CMD_EOP
786                  *
787                  * The following bits can be set in any Data Descriptor, but
788                  * are only set in the last Data Descriptor:
789                  *   - IXGBE_TXD_CMD_RS
790                  */
791                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
792                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
793
794 #ifdef RTE_LIBRTE_IEEE1588
795                 if (ol_flags & PKT_TX_IEEE1588_TMST)
796                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
797 #endif
798
799                 olinfo_status = 0;
800                 if (tx_ol_req) {
801
802                         if (ol_flags & PKT_TX_TCP_SEG) {
803                                 /* when TSO is on, paylen in descriptor is the
804                                  * not the packet len but the tcp payload len */
805                                 pkt_len -= (tx_offload.l2_len +
806                                         tx_offload.l3_len + tx_offload.l4_len);
807                         }
808
809                         /*
810                          * Setup the TX Advanced Context Descriptor if required
811                          */
812                         if (new_ctx) {
813                                 volatile struct ixgbe_adv_tx_context_desc *
814                                     ctx_txd;
815
816                                 ctx_txd = (volatile struct
817                                     ixgbe_adv_tx_context_desc *)
818                                     &txr[tx_id];
819
820                                 txn = &sw_ring[txe->next_id];
821                                 rte_prefetch0(&txn->mbuf->pool);
822
823                                 if (txe->mbuf != NULL) {
824                                         rte_pktmbuf_free_seg(txe->mbuf);
825                                         txe->mbuf = NULL;
826                                 }
827
828                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
829                                         tx_offload);
830
831                                 txe->last_id = tx_last;
832                                 tx_id = txe->next_id;
833                                 txe = txn;
834                         }
835
836                         /*
837                          * Setup the TX Advanced Data Descriptor,
838                          * This path will go through
839                          * whatever new/reuse the context descriptor
840                          */
841                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
842                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
843                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
844                 }
845
846                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
847
848                 m_seg = tx_pkt;
849                 do {
850                         txd = &txr[tx_id];
851                         txn = &sw_ring[txe->next_id];
852                         rte_prefetch0(&txn->mbuf->pool);
853
854                         if (txe->mbuf != NULL)
855                                 rte_pktmbuf_free_seg(txe->mbuf);
856                         txe->mbuf = m_seg;
857
858                         /*
859                          * Set up Transmit Data Descriptor.
860                          */
861                         slen = m_seg->data_len;
862                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
863                         txd->read.buffer_addr =
864                                 rte_cpu_to_le_64(buf_dma_addr);
865                         txd->read.cmd_type_len =
866                                 rte_cpu_to_le_32(cmd_type_len | slen);
867                         txd->read.olinfo_status =
868                                 rte_cpu_to_le_32(olinfo_status);
869                         txe->last_id = tx_last;
870                         tx_id = txe->next_id;
871                         txe = txn;
872                         m_seg = m_seg->next;
873                 } while (m_seg != NULL);
874
875                 /*
876                  * The last packet data descriptor needs End Of Packet (EOP)
877                  */
878                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
879                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
880                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
881
882                 /* Set RS bit only on threshold packets' last descriptor */
883                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
884                         PMD_TX_FREE_LOG(DEBUG,
885                                         "Setting RS bit on TXD id="
886                                         "%4u (port=%d queue=%d)",
887                                         tx_last, txq->port_id, txq->queue_id);
888
889                         cmd_type_len |= IXGBE_TXD_CMD_RS;
890
891                         /* Update txq RS bit counters */
892                         txq->nb_tx_used = 0;
893                         txp = NULL;
894                 } else
895                         txp = txd;
896
897                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
898         }
899
900 end_of_tx:
901         /* set RS on last packet in the burst */
902         if (txp != NULL)
903                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
904
905         rte_wmb();
906
907         /*
908          * Set the Transmit Descriptor Tail (TDT)
909          */
910         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
911                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
912                    (unsigned) tx_id, (unsigned) nb_tx);
913         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
914         txq->tx_tail = tx_id;
915
916         return nb_tx;
917 }
918
919 /*********************************************************************
920  *
921  *  TX prep functions
922  *
923  **********************************************************************/
924 uint16_t
925 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
926 {
927         int i, ret;
928         uint64_t ol_flags;
929         struct rte_mbuf *m;
930         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
931
932         for (i = 0; i < nb_pkts; i++) {
933                 m = tx_pkts[i];
934                 ol_flags = m->ol_flags;
935
936                 /**
937                  * Check if packet meets requirements for number of segments
938                  *
939                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
940                  *       non-TSO
941                  */
942
943                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
944                         rte_errno = -EINVAL;
945                         return i;
946                 }
947
948                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
949                         rte_errno = -ENOTSUP;
950                         return i;
951                 }
952
953 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
954                 ret = rte_validate_tx_offload(m);
955                 if (ret != 0) {
956                         rte_errno = ret;
957                         return i;
958                 }
959 #endif
960                 ret = rte_net_intel_cksum_prepare(m);
961                 if (ret != 0) {
962                         rte_errno = ret;
963                         return i;
964                 }
965         }
966
967         return i;
968 }
969
970 /*********************************************************************
971  *
972  *  RX functions
973  *
974  **********************************************************************/
975
976 #define IXGBE_PACKET_TYPE_ETHER                         0X00
977 #define IXGBE_PACKET_TYPE_IPV4                          0X01
978 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
979 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
980 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
981 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
982 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
983 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
984 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
985 #define IXGBE_PACKET_TYPE_IPV6                          0X04
986 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
987 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
988 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
989 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
990 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
991 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
992 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
993 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
994 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
995 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
996 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
997 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
998 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
999 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1000 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1001 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1002 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1003 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1004 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1005 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1006 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1007 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1008 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1009
1010 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1011 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1012 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1013 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1014 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1015 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1016 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1017 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1018 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1019 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1020 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1021 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1022 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1023 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1024 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1025 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1026 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1027 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1028 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1029 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1030 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1031 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1032 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1033
1034 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1035 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1036 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1037 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1038 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1039 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1040 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1041 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1042 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1043 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1044 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1045 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1046 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1047 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1048 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1049 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1050 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1051 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1052 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1053 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1054 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1055 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1056 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1057
1058 #define IXGBE_PACKET_TYPE_MAX               0X80
1059 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1060 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1061
1062 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1063 static inline uint32_t
1064 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1065 {
1066         /**
1067          * Use 2 different table for normal packet and tunnel packet
1068          * to save the space.
1069          */
1070         static const uint32_t
1071                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1072                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1073                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1074                         RTE_PTYPE_L3_IPV4,
1075                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1076                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1077                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1078                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1079                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1080                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1081                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1082                         RTE_PTYPE_L3_IPV4_EXT,
1083                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1084                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1085                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1086                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1087                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1088                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1089                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1090                         RTE_PTYPE_L3_IPV6,
1091                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1092                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1093                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1094                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1095                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1096                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1097                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1098                         RTE_PTYPE_L3_IPV6_EXT,
1099                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1100                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1101                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1102                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1103                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1104                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1105                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1106                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1107                         RTE_PTYPE_INNER_L3_IPV6,
1108                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1110                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1111                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1112                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1113                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1114                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1116                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1117                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1118                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1119                         RTE_PTYPE_INNER_L3_IPV6,
1120                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1122                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1123                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1124                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1125                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1126                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1128                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1129                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1130                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1131                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1132                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1134                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1135                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1137                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1138                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1139                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1140                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1141                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1142                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1143                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1144                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1145                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1146                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1147                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1148                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1149                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1150                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1151                         RTE_PTYPE_L2_ETHER |
1152                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1154         };
1155
1156         static const uint32_t
1157                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1158                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1160                         RTE_PTYPE_INNER_L2_ETHER,
1161                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1163                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1164                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1166                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1167                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1169                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1170                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1172                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1173                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1175                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1176                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1177                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1178                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1179                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1180                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1181                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1182                         RTE_PTYPE_INNER_L4_TCP,
1183                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1184                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1185                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1186                         RTE_PTYPE_INNER_L4_TCP,
1187                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1189                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1190                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1192                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1193                         RTE_PTYPE_INNER_L4_TCP,
1194                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1195                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1196                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1197                         RTE_PTYPE_INNER_L3_IPV4,
1198                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1199                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1201                         RTE_PTYPE_INNER_L4_UDP,
1202                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1205                         RTE_PTYPE_INNER_L4_UDP,
1206                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1207                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1208                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1209                         RTE_PTYPE_INNER_L4_SCTP,
1210                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1211                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1214                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1216                         RTE_PTYPE_INNER_L4_UDP,
1217                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1218                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1220                         RTE_PTYPE_INNER_L4_SCTP,
1221                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1222                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1223                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1224                         RTE_PTYPE_INNER_L3_IPV4,
1225                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1226                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1227                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1228                         RTE_PTYPE_INNER_L4_SCTP,
1229                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1230                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1231                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1232                         RTE_PTYPE_INNER_L4_SCTP,
1233                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1234                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1235                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1236                         RTE_PTYPE_INNER_L4_TCP,
1237                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1238                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1239                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1240                         RTE_PTYPE_INNER_L4_UDP,
1241
1242                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1244                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1245                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1246                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1247                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1248                         RTE_PTYPE_INNER_L3_IPV4,
1249                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1250                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1251                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1252                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1253                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1254                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1255                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1256                         RTE_PTYPE_INNER_L3_IPV6,
1257                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1258                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1259                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1260                         RTE_PTYPE_INNER_L3_IPV4,
1261                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1262                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1263                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1264                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1265                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1266                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1267                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1268                         RTE_PTYPE_INNER_L3_IPV4,
1269                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1270                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1271                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1272                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1273                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1274                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1275                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1276                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1277                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1278                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1279                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1280                         RTE_PTYPE_INNER_L3_IPV4,
1281                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1282                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1283                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1284                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1285                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1286                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1287                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1288                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1289                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1290                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1291                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1292                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1293                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1294                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1295                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1296                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1297                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1298                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1299                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1300                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1301                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1302                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1303                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1304                         RTE_PTYPE_INNER_L3_IPV4,
1305                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1306                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1307                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1308                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1309                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1310                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1311                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1312                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1313                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1314                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1315                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1316                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1317                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1318                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1319                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1320                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1321                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1322                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1323                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1324                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1325                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1326                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1327                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1328                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1329                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1330                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1331                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1332                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1333         };
1334
1335         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1336                 return RTE_PTYPE_UNKNOWN;
1337
1338         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1339
1340         /* For tunnel packet */
1341         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1342                 /* Remove the tunnel bit to save the space. */
1343                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1344                 return ptype_table_tn[pkt_info];
1345         }
1346
1347         /**
1348          * For x550, if it's not tunnel,
1349          * tunnel type bit should be set to 0.
1350          * Reuse 82599's mask.
1351          */
1352         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1353
1354         return ptype_table[pkt_info];
1355 }
1356
1357 static inline uint64_t
1358 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1359 {
1360         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1361                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1362                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1363                 PKT_RX_RSS_HASH, 0, 0, 0,
1364                 0, 0, 0,  PKT_RX_FDIR,
1365         };
1366 #ifdef RTE_LIBRTE_IEEE1588
1367         static uint64_t ip_pkt_etqf_map[8] = {
1368                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1369                 0, 0, 0, 0,
1370         };
1371
1372         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1373                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1374                                 ip_rss_types_map[pkt_info & 0XF];
1375         else
1376                 return ip_rss_types_map[pkt_info & 0XF];
1377 #else
1378         return ip_rss_types_map[pkt_info & 0XF];
1379 #endif
1380 }
1381
1382 static inline uint64_t
1383 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1384 {
1385         uint64_t pkt_flags;
1386
1387         /*
1388          * Check if VLAN present only.
1389          * Do not check whether L3/L4 rx checksum done by NIC or not,
1390          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1391          */
1392         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1393
1394 #ifdef RTE_LIBRTE_IEEE1588
1395         if (rx_status & IXGBE_RXD_STAT_TMST)
1396                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1397 #endif
1398         return pkt_flags;
1399 }
1400
1401 static inline uint64_t
1402 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1403 {
1404         uint64_t pkt_flags;
1405
1406         /*
1407          * Bit 31: IPE, IPv4 checksum error
1408          * Bit 30: L4I, L4I integrity error
1409          */
1410         static uint64_t error_to_pkt_flags_map[4] = {
1411                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1412                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1413                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1414                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1415         };
1416         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1417                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1418
1419         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1420             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1421                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1422         }
1423
1424         return pkt_flags;
1425 }
1426
1427 /*
1428  * LOOK_AHEAD defines how many desc statuses to check beyond the
1429  * current descriptor.
1430  * It must be a pound define for optimal performance.
1431  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1432  * function only works with LOOK_AHEAD=8.
1433  */
1434 #define LOOK_AHEAD 8
1435 #if (LOOK_AHEAD != 8)
1436 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1437 #endif
1438 static inline int
1439 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1440 {
1441         volatile union ixgbe_adv_rx_desc *rxdp;
1442         struct ixgbe_rx_entry *rxep;
1443         struct rte_mbuf *mb;
1444         uint16_t pkt_len;
1445         uint64_t pkt_flags;
1446         int nb_dd;
1447         uint32_t s[LOOK_AHEAD];
1448         uint32_t pkt_info[LOOK_AHEAD];
1449         int i, j, nb_rx = 0;
1450         uint32_t status;
1451         uint64_t vlan_flags = rxq->vlan_flags;
1452
1453         /* get references to current descriptor and S/W ring entry */
1454         rxdp = &rxq->rx_ring[rxq->rx_tail];
1455         rxep = &rxq->sw_ring[rxq->rx_tail];
1456
1457         status = rxdp->wb.upper.status_error;
1458         /* check to make sure there is at least 1 packet to receive */
1459         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1460                 return 0;
1461
1462         /*
1463          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1464          * reference packets that are ready to be received.
1465          */
1466         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1467              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1468                 /* Read desc statuses backwards to avoid race condition */
1469                 for (j = 0; j < LOOK_AHEAD; j++)
1470                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1471
1472                 rte_smp_rmb();
1473
1474                 /* Compute how many status bits were set */
1475                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1476                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1477                         ;
1478
1479                 for (j = 0; j < nb_dd; j++)
1480                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1481                                                        lo_dword.data);
1482
1483                 nb_rx += nb_dd;
1484
1485                 /* Translate descriptor info to mbuf format */
1486                 for (j = 0; j < nb_dd; ++j) {
1487                         mb = rxep[j].mbuf;
1488                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1489                                   rxq->crc_len;
1490                         mb->data_len = pkt_len;
1491                         mb->pkt_len = pkt_len;
1492                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1493
1494                         /* convert descriptor fields to rte mbuf flags */
1495                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1496                                 vlan_flags);
1497                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1498                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1499                                         ((uint16_t)pkt_info[j]);
1500                         mb->ol_flags = pkt_flags;
1501                         mb->packet_type =
1502                                 ixgbe_rxd_pkt_info_to_pkt_type
1503                                         (pkt_info[j], rxq->pkt_type_mask);
1504
1505                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1506                                 mb->hash.rss = rte_le_to_cpu_32(
1507                                     rxdp[j].wb.lower.hi_dword.rss);
1508                         else if (pkt_flags & PKT_RX_FDIR) {
1509                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1510                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1511                                     IXGBE_ATR_HASH_MASK;
1512                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1513                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1514                         }
1515                 }
1516
1517                 /* Move mbuf pointers from the S/W ring to the stage */
1518                 for (j = 0; j < LOOK_AHEAD; ++j) {
1519                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1520                 }
1521
1522                 /* stop if all requested packets could not be received */
1523                 if (nb_dd != LOOK_AHEAD)
1524                         break;
1525         }
1526
1527         /* clear software ring entries so we can cleanup correctly */
1528         for (i = 0; i < nb_rx; ++i) {
1529                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1530         }
1531
1532
1533         return nb_rx;
1534 }
1535
1536 static inline int
1537 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1538 {
1539         volatile union ixgbe_adv_rx_desc *rxdp;
1540         struct ixgbe_rx_entry *rxep;
1541         struct rte_mbuf *mb;
1542         uint16_t alloc_idx;
1543         __le64 dma_addr;
1544         int diag, i;
1545
1546         /* allocate buffers in bulk directly into the S/W ring */
1547         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1548         rxep = &rxq->sw_ring[alloc_idx];
1549         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1550                                     rxq->rx_free_thresh);
1551         if (unlikely(diag != 0))
1552                 return -ENOMEM;
1553
1554         rxdp = &rxq->rx_ring[alloc_idx];
1555         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1556                 /* populate the static rte mbuf fields */
1557                 mb = rxep[i].mbuf;
1558                 if (reset_mbuf) {
1559                         mb->next = NULL;
1560                         mb->nb_segs = 1;
1561                         mb->port = rxq->port_id;
1562                 }
1563
1564                 rte_mbuf_refcnt_set(mb, 1);
1565                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1566
1567                 /* populate the descriptors */
1568                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1569                 rxdp[i].read.hdr_addr = 0;
1570                 rxdp[i].read.pkt_addr = dma_addr;
1571         }
1572
1573         /* update state of internal queue structure */
1574         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1575         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1576                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1577
1578         /* no errors */
1579         return 0;
1580 }
1581
1582 static inline uint16_t
1583 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1584                          uint16_t nb_pkts)
1585 {
1586         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1587         int i;
1588
1589         /* how many packets are ready to return? */
1590         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1591
1592         /* copy mbuf pointers to the application's packet list */
1593         for (i = 0; i < nb_pkts; ++i)
1594                 rx_pkts[i] = stage[i];
1595
1596         /* update internal queue state */
1597         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1598         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1599
1600         return nb_pkts;
1601 }
1602
1603 static inline uint16_t
1604 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1605              uint16_t nb_pkts)
1606 {
1607         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1608         uint16_t nb_rx = 0;
1609
1610         /* Any previously recv'd pkts will be returned from the Rx stage */
1611         if (rxq->rx_nb_avail)
1612                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1613
1614         /* Scan the H/W ring for packets to receive */
1615         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1616
1617         /* update internal queue state */
1618         rxq->rx_next_avail = 0;
1619         rxq->rx_nb_avail = nb_rx;
1620         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1621
1622         /* if required, allocate new buffers to replenish descriptors */
1623         if (rxq->rx_tail > rxq->rx_free_trigger) {
1624                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1625
1626                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1627                         int i, j;
1628
1629                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1630                                    "queue_id=%u", (unsigned) rxq->port_id,
1631                                    (unsigned) rxq->queue_id);
1632
1633                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1634                                 rxq->rx_free_thresh;
1635
1636                         /*
1637                          * Need to rewind any previous receives if we cannot
1638                          * allocate new buffers to replenish the old ones.
1639                          */
1640                         rxq->rx_nb_avail = 0;
1641                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1642                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1643                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1644
1645                         return 0;
1646                 }
1647
1648                 /* update tail pointer */
1649                 rte_wmb();
1650                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1651                                             cur_free_trigger);
1652         }
1653
1654         if (rxq->rx_tail >= rxq->nb_rx_desc)
1655                 rxq->rx_tail = 0;
1656
1657         /* received any packets this loop? */
1658         if (rxq->rx_nb_avail)
1659                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1660
1661         return 0;
1662 }
1663
1664 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1665 uint16_t
1666 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1667                            uint16_t nb_pkts)
1668 {
1669         uint16_t nb_rx;
1670
1671         if (unlikely(nb_pkts == 0))
1672                 return 0;
1673
1674         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1675                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1676
1677         /* request is relatively large, chunk it up */
1678         nb_rx = 0;
1679         while (nb_pkts) {
1680                 uint16_t ret, n;
1681
1682                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1683                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1684                 nb_rx = (uint16_t)(nb_rx + ret);
1685                 nb_pkts = (uint16_t)(nb_pkts - ret);
1686                 if (ret < n)
1687                         break;
1688         }
1689
1690         return nb_rx;
1691 }
1692
1693 uint16_t
1694 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1695                 uint16_t nb_pkts)
1696 {
1697         struct ixgbe_rx_queue *rxq;
1698         volatile union ixgbe_adv_rx_desc *rx_ring;
1699         volatile union ixgbe_adv_rx_desc *rxdp;
1700         struct ixgbe_rx_entry *sw_ring;
1701         struct ixgbe_rx_entry *rxe;
1702         struct rte_mbuf *rxm;
1703         struct rte_mbuf *nmb;
1704         union ixgbe_adv_rx_desc rxd;
1705         uint64_t dma_addr;
1706         uint32_t staterr;
1707         uint32_t pkt_info;
1708         uint16_t pkt_len;
1709         uint16_t rx_id;
1710         uint16_t nb_rx;
1711         uint16_t nb_hold;
1712         uint64_t pkt_flags;
1713         uint64_t vlan_flags;
1714
1715         nb_rx = 0;
1716         nb_hold = 0;
1717         rxq = rx_queue;
1718         rx_id = rxq->rx_tail;
1719         rx_ring = rxq->rx_ring;
1720         sw_ring = rxq->sw_ring;
1721         vlan_flags = rxq->vlan_flags;
1722         while (nb_rx < nb_pkts) {
1723                 /*
1724                  * The order of operations here is important as the DD status
1725                  * bit must not be read after any other descriptor fields.
1726                  * rx_ring and rxdp are pointing to volatile data so the order
1727                  * of accesses cannot be reordered by the compiler. If they were
1728                  * not volatile, they could be reordered which could lead to
1729                  * using invalid descriptor fields when read from rxd.
1730                  */
1731                 rxdp = &rx_ring[rx_id];
1732                 staterr = rxdp->wb.upper.status_error;
1733                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1734                         break;
1735                 rxd = *rxdp;
1736
1737                 /*
1738                  * End of packet.
1739                  *
1740                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1741                  * is likely to be invalid and to be dropped by the various
1742                  * validation checks performed by the network stack.
1743                  *
1744                  * Allocate a new mbuf to replenish the RX ring descriptor.
1745                  * If the allocation fails:
1746                  *    - arrange for that RX descriptor to be the first one
1747                  *      being parsed the next time the receive function is
1748                  *      invoked [on the same queue].
1749                  *
1750                  *    - Stop parsing the RX ring and return immediately.
1751                  *
1752                  * This policy do not drop the packet received in the RX
1753                  * descriptor for which the allocation of a new mbuf failed.
1754                  * Thus, it allows that packet to be later retrieved if
1755                  * mbuf have been freed in the mean time.
1756                  * As a side effect, holding RX descriptors instead of
1757                  * systematically giving them back to the NIC may lead to
1758                  * RX ring exhaustion situations.
1759                  * However, the NIC can gracefully prevent such situations
1760                  * to happen by sending specific "back-pressure" flow control
1761                  * frames to its peer(s).
1762                  */
1763                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1764                            "ext_err_stat=0x%08x pkt_len=%u",
1765                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1766                            (unsigned) rx_id, (unsigned) staterr,
1767                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1768
1769                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1770                 if (nmb == NULL) {
1771                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1772                                    "queue_id=%u", (unsigned) rxq->port_id,
1773                                    (unsigned) rxq->queue_id);
1774                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1775                         break;
1776                 }
1777
1778                 nb_hold++;
1779                 rxe = &sw_ring[rx_id];
1780                 rx_id++;
1781                 if (rx_id == rxq->nb_rx_desc)
1782                         rx_id = 0;
1783
1784                 /* Prefetch next mbuf while processing current one. */
1785                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1786
1787                 /*
1788                  * When next RX descriptor is on a cache-line boundary,
1789                  * prefetch the next 4 RX descriptors and the next 8 pointers
1790                  * to mbufs.
1791                  */
1792                 if ((rx_id & 0x3) == 0) {
1793                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1794                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1795                 }
1796
1797                 rxm = rxe->mbuf;
1798                 rxe->mbuf = nmb;
1799                 dma_addr =
1800                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1801                 rxdp->read.hdr_addr = 0;
1802                 rxdp->read.pkt_addr = dma_addr;
1803
1804                 /*
1805                  * Initialize the returned mbuf.
1806                  * 1) setup generic mbuf fields:
1807                  *    - number of segments,
1808                  *    - next segment,
1809                  *    - packet length,
1810                  *    - RX port identifier.
1811                  * 2) integrate hardware offload data, if any:
1812                  *    - RSS flag & hash,
1813                  *    - IP checksum flag,
1814                  *    - VLAN TCI, if any,
1815                  *    - error flags.
1816                  */
1817                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1818                                       rxq->crc_len);
1819                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1820                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1821                 rxm->nb_segs = 1;
1822                 rxm->next = NULL;
1823                 rxm->pkt_len = pkt_len;
1824                 rxm->data_len = pkt_len;
1825                 rxm->port = rxq->port_id;
1826
1827                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1828                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1829                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1830
1831                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1832                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1833                 pkt_flags = pkt_flags |
1834                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1835                 rxm->ol_flags = pkt_flags;
1836                 rxm->packet_type =
1837                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1838                                                        rxq->pkt_type_mask);
1839
1840                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1841                         rxm->hash.rss = rte_le_to_cpu_32(
1842                                                 rxd.wb.lower.hi_dword.rss);
1843                 else if (pkt_flags & PKT_RX_FDIR) {
1844                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1845                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1846                                         IXGBE_ATR_HASH_MASK;
1847                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1848                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1849                 }
1850                 /*
1851                  * Store the mbuf address into the next entry of the array
1852                  * of returned packets.
1853                  */
1854                 rx_pkts[nb_rx++] = rxm;
1855         }
1856         rxq->rx_tail = rx_id;
1857
1858         /*
1859          * If the number of free RX descriptors is greater than the RX free
1860          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1861          * register.
1862          * Update the RDT with the value of the last processed RX descriptor
1863          * minus 1, to guarantee that the RDT register is never equal to the
1864          * RDH register, which creates a "full" ring situtation from the
1865          * hardware point of view...
1866          */
1867         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1868         if (nb_hold > rxq->rx_free_thresh) {
1869                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1870                            "nb_hold=%u nb_rx=%u",
1871                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1872                            (unsigned) rx_id, (unsigned) nb_hold,
1873                            (unsigned) nb_rx);
1874                 rx_id = (uint16_t) ((rx_id == 0) ?
1875                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1876                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1877                 nb_hold = 0;
1878         }
1879         rxq->nb_rx_hold = nb_hold;
1880         return nb_rx;
1881 }
1882
1883 /**
1884  * Detect an RSC descriptor.
1885  */
1886 static inline uint32_t
1887 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1888 {
1889         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1890                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1891 }
1892
1893 /**
1894  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1895  *
1896  * Fill the following info in the HEAD buffer of the Rx cluster:
1897  *    - RX port identifier
1898  *    - hardware offload data, if any:
1899  *      - RSS flag & hash
1900  *      - IP checksum flag
1901  *      - VLAN TCI, if any
1902  *      - error flags
1903  * @head HEAD of the packet cluster
1904  * @desc HW descriptor to get data from
1905  * @rxq Pointer to the Rx queue
1906  */
1907 static inline void
1908 ixgbe_fill_cluster_head_buf(
1909         struct rte_mbuf *head,
1910         union ixgbe_adv_rx_desc *desc,
1911         struct ixgbe_rx_queue *rxq,
1912         uint32_t staterr)
1913 {
1914         uint32_t pkt_info;
1915         uint64_t pkt_flags;
1916
1917         head->port = rxq->port_id;
1918
1919         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1920          * set in the pkt_flags field.
1921          */
1922         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1923         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1924         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1925         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1926         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1927         head->ol_flags = pkt_flags;
1928         head->packet_type =
1929                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1930
1931         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1932                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1933         else if (pkt_flags & PKT_RX_FDIR) {
1934                 head->hash.fdir.hash =
1935                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1936                                                           & IXGBE_ATR_HASH_MASK;
1937                 head->hash.fdir.id =
1938                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1939         }
1940 }
1941
1942 /**
1943  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1944  *
1945  * @rx_queue Rx queue handle
1946  * @rx_pkts table of received packets
1947  * @nb_pkts size of rx_pkts table
1948  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1949  *
1950  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1951  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1952  *
1953  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1954  * 1) When non-EOP RSC completion arrives:
1955  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1956  *       segment's data length.
1957  *    b) Set the "next" pointer of the current segment to point to the segment
1958  *       at the NEXTP index.
1959  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1960  *       in the sw_rsc_ring.
1961  * 2) When EOP arrives we just update the cluster's total length and offload
1962  *    flags and deliver the cluster up to the upper layers. In our case - put it
1963  *    in the rx_pkts table.
1964  *
1965  * Returns the number of received packets/clusters (according to the "bulk
1966  * receive" interface).
1967  */
1968 static inline uint16_t
1969 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1970                     bool bulk_alloc)
1971 {
1972         struct ixgbe_rx_queue *rxq = rx_queue;
1973         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
1974         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
1975         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
1976         uint16_t rx_id = rxq->rx_tail;
1977         uint16_t nb_rx = 0;
1978         uint16_t nb_hold = rxq->nb_rx_hold;
1979         uint16_t prev_id = rxq->rx_tail;
1980
1981         while (nb_rx < nb_pkts) {
1982                 bool eop;
1983                 struct ixgbe_rx_entry *rxe;
1984                 struct ixgbe_scattered_rx_entry *sc_entry;
1985                 struct ixgbe_scattered_rx_entry *next_sc_entry;
1986                 struct ixgbe_rx_entry *next_rxe = NULL;
1987                 struct rte_mbuf *first_seg;
1988                 struct rte_mbuf *rxm;
1989                 struct rte_mbuf *nmb;
1990                 union ixgbe_adv_rx_desc rxd;
1991                 uint16_t data_len;
1992                 uint16_t next_id;
1993                 volatile union ixgbe_adv_rx_desc *rxdp;
1994                 uint32_t staterr;
1995
1996 next_desc:
1997                 /*
1998                  * The code in this whole file uses the volatile pointer to
1999                  * ensure the read ordering of the status and the rest of the
2000                  * descriptor fields (on the compiler level only!!!). This is so
2001                  * UGLY - why not to just use the compiler barrier instead? DPDK
2002                  * even has the rte_compiler_barrier() for that.
2003                  *
2004                  * But most importantly this is just wrong because this doesn't
2005                  * ensure memory ordering in a general case at all. For
2006                  * instance, DPDK is supposed to work on Power CPUs where
2007                  * compiler barrier may just not be enough!
2008                  *
2009                  * I tried to write only this function properly to have a
2010                  * starting point (as a part of an LRO/RSC series) but the
2011                  * compiler cursed at me when I tried to cast away the
2012                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2013                  * keeping it the way it is for now.
2014                  *
2015                  * The code in this file is broken in so many other places and
2016                  * will just not work on a big endian CPU anyway therefore the
2017                  * lines below will have to be revisited together with the rest
2018                  * of the ixgbe PMD.
2019                  *
2020                  * TODO:
2021                  *    - Get rid of "volatile" crap and let the compiler do its
2022                  *      job.
2023                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2024                  *      memory ordering below.
2025                  */
2026                 rxdp = &rx_ring[rx_id];
2027                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2028
2029                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2030                         break;
2031
2032                 rxd = *rxdp;
2033
2034                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2035                                   "staterr=0x%x data_len=%u",
2036                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2037                            rte_le_to_cpu_16(rxd.wb.upper.length));
2038
2039                 if (!bulk_alloc) {
2040                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2041                         if (nmb == NULL) {
2042                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2043                                                   "port_id=%u queue_id=%u",
2044                                            rxq->port_id, rxq->queue_id);
2045
2046                                 rte_eth_devices[rxq->port_id].data->
2047                                                         rx_mbuf_alloc_failed++;
2048                                 break;
2049                         }
2050                 } else if (nb_hold > rxq->rx_free_thresh) {
2051                         uint16_t next_rdt = rxq->rx_free_trigger;
2052
2053                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2054                                 rte_wmb();
2055                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2056                                                             next_rdt);
2057                                 nb_hold -= rxq->rx_free_thresh;
2058                         } else {
2059                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2060                                                   "port_id=%u queue_id=%u",
2061                                            rxq->port_id, rxq->queue_id);
2062
2063                                 rte_eth_devices[rxq->port_id].data->
2064                                                         rx_mbuf_alloc_failed++;
2065                                 break;
2066                         }
2067                 }
2068
2069                 nb_hold++;
2070                 rxe = &sw_ring[rx_id];
2071                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2072
2073                 next_id = rx_id + 1;
2074                 if (next_id == rxq->nb_rx_desc)
2075                         next_id = 0;
2076
2077                 /* Prefetch next mbuf while processing current one. */
2078                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2079
2080                 /*
2081                  * When next RX descriptor is on a cache-line boundary,
2082                  * prefetch the next 4 RX descriptors and the next 4 pointers
2083                  * to mbufs.
2084                  */
2085                 if ((next_id & 0x3) == 0) {
2086                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2087                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2088                 }
2089
2090                 rxm = rxe->mbuf;
2091
2092                 if (!bulk_alloc) {
2093                         __le64 dma =
2094                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2095                         /*
2096                          * Update RX descriptor with the physical address of the
2097                          * new data buffer of the new allocated mbuf.
2098                          */
2099                         rxe->mbuf = nmb;
2100
2101                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2102                         rxdp->read.hdr_addr = 0;
2103                         rxdp->read.pkt_addr = dma;
2104                 } else
2105                         rxe->mbuf = NULL;
2106
2107                 /*
2108                  * Set data length & data buffer address of mbuf.
2109                  */
2110                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2111                 rxm->data_len = data_len;
2112
2113                 if (!eop) {
2114                         uint16_t nextp_id;
2115                         /*
2116                          * Get next descriptor index:
2117                          *  - For RSC it's in the NEXTP field.
2118                          *  - For a scattered packet - it's just a following
2119                          *    descriptor.
2120                          */
2121                         if (ixgbe_rsc_count(&rxd))
2122                                 nextp_id =
2123                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2124                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2125                         else
2126                                 nextp_id = next_id;
2127
2128                         next_sc_entry = &sw_sc_ring[nextp_id];
2129                         next_rxe = &sw_ring[nextp_id];
2130                         rte_ixgbe_prefetch(next_rxe);
2131                 }
2132
2133                 sc_entry = &sw_sc_ring[rx_id];
2134                 first_seg = sc_entry->fbuf;
2135                 sc_entry->fbuf = NULL;
2136
2137                 /*
2138                  * If this is the first buffer of the received packet,
2139                  * set the pointer to the first mbuf of the packet and
2140                  * initialize its context.
2141                  * Otherwise, update the total length and the number of segments
2142                  * of the current scattered packet, and update the pointer to
2143                  * the last mbuf of the current packet.
2144                  */
2145                 if (first_seg == NULL) {
2146                         first_seg = rxm;
2147                         first_seg->pkt_len = data_len;
2148                         first_seg->nb_segs = 1;
2149                 } else {
2150                         first_seg->pkt_len += data_len;
2151                         first_seg->nb_segs++;
2152                 }
2153
2154                 prev_id = rx_id;
2155                 rx_id = next_id;
2156
2157                 /*
2158                  * If this is not the last buffer of the received packet, update
2159                  * the pointer to the first mbuf at the NEXTP entry in the
2160                  * sw_sc_ring and continue to parse the RX ring.
2161                  */
2162                 if (!eop && next_rxe) {
2163                         rxm->next = next_rxe->mbuf;
2164                         next_sc_entry->fbuf = first_seg;
2165                         goto next_desc;
2166                 }
2167
2168                 /*
2169                  * This is the last buffer of the received packet - return
2170                  * the current cluster to the user.
2171                  */
2172                 rxm->next = NULL;
2173
2174                 /* Initialize the first mbuf of the returned packet */
2175                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2176
2177                 /*
2178                  * Deal with the case, when HW CRC srip is disabled.
2179                  * That can't happen when LRO is enabled, but still could
2180                  * happen for scattered RX mode.
2181                  */
2182                 first_seg->pkt_len -= rxq->crc_len;
2183                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2184                         struct rte_mbuf *lp;
2185
2186                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2187                                 ;
2188
2189                         first_seg->nb_segs--;
2190                         lp->data_len -= rxq->crc_len - rxm->data_len;
2191                         lp->next = NULL;
2192                         rte_pktmbuf_free_seg(rxm);
2193                 } else
2194                         rxm->data_len -= rxq->crc_len;
2195
2196                 /* Prefetch data of first segment, if configured to do so. */
2197                 rte_packet_prefetch((char *)first_seg->buf_addr +
2198                         first_seg->data_off);
2199
2200                 /*
2201                  * Store the mbuf address into the next entry of the array
2202                  * of returned packets.
2203                  */
2204                 rx_pkts[nb_rx++] = first_seg;
2205         }
2206
2207         /*
2208          * Record index of the next RX descriptor to probe.
2209          */
2210         rxq->rx_tail = rx_id;
2211
2212         /*
2213          * If the number of free RX descriptors is greater than the RX free
2214          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2215          * register.
2216          * Update the RDT with the value of the last processed RX descriptor
2217          * minus 1, to guarantee that the RDT register is never equal to the
2218          * RDH register, which creates a "full" ring situtation from the
2219          * hardware point of view...
2220          */
2221         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2222                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2223                            "nb_hold=%u nb_rx=%u",
2224                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2225
2226                 rte_wmb();
2227                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2228                 nb_hold = 0;
2229         }
2230
2231         rxq->nb_rx_hold = nb_hold;
2232         return nb_rx;
2233 }
2234
2235 uint16_t
2236 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2237                                  uint16_t nb_pkts)
2238 {
2239         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2240 }
2241
2242 uint16_t
2243 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2244                                uint16_t nb_pkts)
2245 {
2246         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2247 }
2248
2249 /*********************************************************************
2250  *
2251  *  Queue management functions
2252  *
2253  **********************************************************************/
2254
2255 static void __attribute__((cold))
2256 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2257 {
2258         unsigned i;
2259
2260         if (txq->sw_ring != NULL) {
2261                 for (i = 0; i < txq->nb_tx_desc; i++) {
2262                         if (txq->sw_ring[i].mbuf != NULL) {
2263                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2264                                 txq->sw_ring[i].mbuf = NULL;
2265                         }
2266                 }
2267         }
2268 }
2269
2270 static void __attribute__((cold))
2271 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2272 {
2273         if (txq != NULL &&
2274             txq->sw_ring != NULL)
2275                 rte_free(txq->sw_ring);
2276 }
2277
2278 static void __attribute__((cold))
2279 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2280 {
2281         if (txq != NULL && txq->ops != NULL) {
2282                 txq->ops->release_mbufs(txq);
2283                 txq->ops->free_swring(txq);
2284                 rte_free(txq);
2285         }
2286 }
2287
2288 void __attribute__((cold))
2289 ixgbe_dev_tx_queue_release(void *txq)
2290 {
2291         ixgbe_tx_queue_release(txq);
2292 }
2293
2294 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2295 static void __attribute__((cold))
2296 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2297 {
2298         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2299         struct ixgbe_tx_entry *txe = txq->sw_ring;
2300         uint16_t prev, i;
2301
2302         /* Zero out HW ring memory */
2303         for (i = 0; i < txq->nb_tx_desc; i++) {
2304                 txq->tx_ring[i] = zeroed_desc;
2305         }
2306
2307         /* Initialize SW ring entries */
2308         prev = (uint16_t) (txq->nb_tx_desc - 1);
2309         for (i = 0; i < txq->nb_tx_desc; i++) {
2310                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2311
2312                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2313                 txe[i].mbuf = NULL;
2314                 txe[i].last_id = i;
2315                 txe[prev].next_id = i;
2316                 prev = i;
2317         }
2318
2319         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2320         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2321
2322         txq->tx_tail = 0;
2323         txq->nb_tx_used = 0;
2324         /*
2325          * Always allow 1 descriptor to be un-allocated to avoid
2326          * a H/W race condition
2327          */
2328         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2329         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2330         txq->ctx_curr = 0;
2331         memset((void *)&txq->ctx_cache, 0,
2332                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2333 }
2334
2335 static const struct ixgbe_txq_ops def_txq_ops = {
2336         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2337         .free_swring = ixgbe_tx_free_swring,
2338         .reset = ixgbe_reset_tx_queue,
2339 };
2340
2341 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2342  * the queue parameters. Used in tx_queue_setup by primary process and then
2343  * in dev_init by secondary process when attaching to an existing ethdev.
2344  */
2345 void __attribute__((cold))
2346 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2347 {
2348         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2349         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2350                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2351                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2352                 dev->tx_pkt_prepare = NULL;
2353 #ifdef RTE_IXGBE_INC_VECTOR
2354                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2355                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2356                                         ixgbe_txq_vec_setup(txq) == 0)) {
2357                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2358                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2359                 } else
2360 #endif
2361                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2362         } else {
2363                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2364                 PMD_INIT_LOG(DEBUG,
2365                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2366                                 (unsigned long)txq->txq_flags,
2367                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2368                 PMD_INIT_LOG(DEBUG,
2369                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2370                                 (unsigned long)txq->tx_rs_thresh,
2371                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2372                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2373                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2374         }
2375 }
2376
2377 int __attribute__((cold))
2378 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2379                          uint16_t queue_idx,
2380                          uint16_t nb_desc,
2381                          unsigned int socket_id,
2382                          const struct rte_eth_txconf *tx_conf)
2383 {
2384         const struct rte_memzone *tz;
2385         struct ixgbe_tx_queue *txq;
2386         struct ixgbe_hw     *hw;
2387         uint16_t tx_rs_thresh, tx_free_thresh;
2388
2389         PMD_INIT_FUNC_TRACE();
2390         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2391
2392         /*
2393          * Validate number of transmit descriptors.
2394          * It must not exceed hardware maximum, and must be multiple
2395          * of IXGBE_ALIGN.
2396          */
2397         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2398                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2399                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2400                 return -EINVAL;
2401         }
2402
2403         /*
2404          * The following two parameters control the setting of the RS bit on
2405          * transmit descriptors.
2406          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2407          * descriptors have been used.
2408          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2409          * descriptors are used or if the number of descriptors required
2410          * to transmit a packet is greater than the number of free TX
2411          * descriptors.
2412          * The following constraints must be satisfied:
2413          *  tx_rs_thresh must be greater than 0.
2414          *  tx_rs_thresh must be less than the size of the ring minus 2.
2415          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2416          *  tx_rs_thresh must be a divisor of the ring size.
2417          *  tx_free_thresh must be greater than 0.
2418          *  tx_free_thresh must be less than the size of the ring minus 3.
2419          * One descriptor in the TX ring is used as a sentinel to avoid a
2420          * H/W race condition, hence the maximum threshold constraints.
2421          * When set to zero use default values.
2422          */
2423         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2424                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2425         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2426                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2427         if (tx_rs_thresh >= (nb_desc - 2)) {
2428                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2429                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2430                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2431                         (int)dev->data->port_id, (int)queue_idx);
2432                 return -(EINVAL);
2433         }
2434         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2435                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2436                         "(tx_rs_thresh=%u port=%d queue=%d)",
2437                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2438                         (int)dev->data->port_id, (int)queue_idx);
2439                 return -(EINVAL);
2440         }
2441         if (tx_free_thresh >= (nb_desc - 3)) {
2442                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2443                              "tx_free_thresh must be less than the number of "
2444                              "TX descriptors minus 3. (tx_free_thresh=%u "
2445                              "port=%d queue=%d)",
2446                              (unsigned int)tx_free_thresh,
2447                              (int)dev->data->port_id, (int)queue_idx);
2448                 return -(EINVAL);
2449         }
2450         if (tx_rs_thresh > tx_free_thresh) {
2451                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2452                              "tx_free_thresh. (tx_free_thresh=%u "
2453                              "tx_rs_thresh=%u port=%d queue=%d)",
2454                              (unsigned int)tx_free_thresh,
2455                              (unsigned int)tx_rs_thresh,
2456                              (int)dev->data->port_id,
2457                              (int)queue_idx);
2458                 return -(EINVAL);
2459         }
2460         if ((nb_desc % tx_rs_thresh) != 0) {
2461                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2462                              "number of TX descriptors. (tx_rs_thresh=%u "
2463                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2464                              (int)dev->data->port_id, (int)queue_idx);
2465                 return -(EINVAL);
2466         }
2467
2468         /*
2469          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2470          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2471          * by the NIC and all descriptors are written back after the NIC
2472          * accumulates WTHRESH descriptors.
2473          */
2474         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2475                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2476                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2477                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2478                              (int)dev->data->port_id, (int)queue_idx);
2479                 return -(EINVAL);
2480         }
2481
2482         /* Free memory prior to re-allocation if needed... */
2483         if (dev->data->tx_queues[queue_idx] != NULL) {
2484                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2485                 dev->data->tx_queues[queue_idx] = NULL;
2486         }
2487
2488         /* First allocate the tx queue data structure */
2489         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2490                                  RTE_CACHE_LINE_SIZE, socket_id);
2491         if (txq == NULL)
2492                 return -ENOMEM;
2493
2494         /*
2495          * Allocate TX ring hardware descriptors. A memzone large enough to
2496          * handle the maximum ring size is allocated in order to allow for
2497          * resizing in later calls to the queue setup function.
2498          */
2499         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2500                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2501                         IXGBE_ALIGN, socket_id);
2502         if (tz == NULL) {
2503                 ixgbe_tx_queue_release(txq);
2504                 return -ENOMEM;
2505         }
2506
2507         txq->nb_tx_desc = nb_desc;
2508         txq->tx_rs_thresh = tx_rs_thresh;
2509         txq->tx_free_thresh = tx_free_thresh;
2510         txq->pthresh = tx_conf->tx_thresh.pthresh;
2511         txq->hthresh = tx_conf->tx_thresh.hthresh;
2512         txq->wthresh = tx_conf->tx_thresh.wthresh;
2513         txq->queue_id = queue_idx;
2514         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2515                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2516         txq->port_id = dev->data->port_id;
2517         txq->txq_flags = tx_conf->txq_flags;
2518         txq->ops = &def_txq_ops;
2519         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2520
2521         /*
2522          * Modification to set VFTDT for virtual function if vf is detected
2523          */
2524         if (hw->mac.type == ixgbe_mac_82599_vf ||
2525             hw->mac.type == ixgbe_mac_X540_vf ||
2526             hw->mac.type == ixgbe_mac_X550_vf ||
2527             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2528             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2529                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2530         else
2531                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2532
2533         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2534         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2535
2536         /* Allocate software ring */
2537         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2538                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2539                                 RTE_CACHE_LINE_SIZE, socket_id);
2540         if (txq->sw_ring == NULL) {
2541                 ixgbe_tx_queue_release(txq);
2542                 return -ENOMEM;
2543         }
2544         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2545                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2546
2547         /* set up vector or scalar TX function as appropriate */
2548         ixgbe_set_tx_function(dev, txq);
2549
2550         txq->ops->reset(txq);
2551
2552         dev->data->tx_queues[queue_idx] = txq;
2553
2554
2555         return 0;
2556 }
2557
2558 /**
2559  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2560  *
2561  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2562  * in the sw_rsc_ring is not set to NULL but rather points to the next
2563  * mbuf of this RSC aggregation (that has not been completed yet and still
2564  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2565  * will just free first "nb_segs" segments of the cluster explicitly by calling
2566  * an rte_pktmbuf_free_seg().
2567  *
2568  * @m scattered cluster head
2569  */
2570 static void __attribute__((cold))
2571 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2572 {
2573         uint8_t i, nb_segs = m->nb_segs;
2574         struct rte_mbuf *next_seg;
2575
2576         for (i = 0; i < nb_segs; i++) {
2577                 next_seg = m->next;
2578                 rte_pktmbuf_free_seg(m);
2579                 m = next_seg;
2580         }
2581 }
2582
2583 static void __attribute__((cold))
2584 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2585 {
2586         unsigned i;
2587
2588 #ifdef RTE_IXGBE_INC_VECTOR
2589         /* SSE Vector driver has a different way of releasing mbufs. */
2590         if (rxq->rx_using_sse) {
2591                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2592                 return;
2593         }
2594 #endif
2595
2596         if (rxq->sw_ring != NULL) {
2597                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2598                         if (rxq->sw_ring[i].mbuf != NULL) {
2599                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2600                                 rxq->sw_ring[i].mbuf = NULL;
2601                         }
2602                 }
2603                 if (rxq->rx_nb_avail) {
2604                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2605                                 struct rte_mbuf *mb;
2606
2607                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2608                                 rte_pktmbuf_free_seg(mb);
2609                         }
2610                         rxq->rx_nb_avail = 0;
2611                 }
2612         }
2613
2614         if (rxq->sw_sc_ring)
2615                 for (i = 0; i < rxq->nb_rx_desc; i++)
2616                         if (rxq->sw_sc_ring[i].fbuf) {
2617                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2618                                 rxq->sw_sc_ring[i].fbuf = NULL;
2619                         }
2620 }
2621
2622 static void __attribute__((cold))
2623 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2624 {
2625         if (rxq != NULL) {
2626                 ixgbe_rx_queue_release_mbufs(rxq);
2627                 rte_free(rxq->sw_ring);
2628                 rte_free(rxq->sw_sc_ring);
2629                 rte_free(rxq);
2630         }
2631 }
2632
2633 void __attribute__((cold))
2634 ixgbe_dev_rx_queue_release(void *rxq)
2635 {
2636         ixgbe_rx_queue_release(rxq);
2637 }
2638
2639 /*
2640  * Check if Rx Burst Bulk Alloc function can be used.
2641  * Return
2642  *        0: the preconditions are satisfied and the bulk allocation function
2643  *           can be used.
2644  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2645  *           function must be used.
2646  */
2647 static inline int __attribute__((cold))
2648 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2649 {
2650         int ret = 0;
2651
2652         /*
2653          * Make sure the following pre-conditions are satisfied:
2654          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2655          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2656          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2657          * Scattered packets are not supported.  This should be checked
2658          * outside of this function.
2659          */
2660         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2661                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2662                              "rxq->rx_free_thresh=%d, "
2663                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2664                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2665                 ret = -EINVAL;
2666         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2667                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2668                              "rxq->rx_free_thresh=%d, "
2669                              "rxq->nb_rx_desc=%d",
2670                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2671                 ret = -EINVAL;
2672         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2673                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2674                              "rxq->nb_rx_desc=%d, "
2675                              "rxq->rx_free_thresh=%d",
2676                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2677                 ret = -EINVAL;
2678         }
2679
2680         return ret;
2681 }
2682
2683 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2684 static void __attribute__((cold))
2685 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2686 {
2687         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2688         unsigned i;
2689         uint16_t len = rxq->nb_rx_desc;
2690
2691         /*
2692          * By default, the Rx queue setup function allocates enough memory for
2693          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2694          * extra memory at the end of the descriptor ring to be zero'd out.
2695          */
2696         if (adapter->rx_bulk_alloc_allowed)
2697                 /* zero out extra memory */
2698                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2699
2700         /*
2701          * Zero out HW ring memory. Zero out extra memory at the end of
2702          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2703          * reads extra memory as zeros.
2704          */
2705         for (i = 0; i < len; i++) {
2706                 rxq->rx_ring[i] = zeroed_desc;
2707         }
2708
2709         /*
2710          * initialize extra software ring entries. Space for these extra
2711          * entries is always allocated
2712          */
2713         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2714         for (i = rxq->nb_rx_desc; i < len; ++i) {
2715                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2716         }
2717
2718         rxq->rx_nb_avail = 0;
2719         rxq->rx_next_avail = 0;
2720         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2721         rxq->rx_tail = 0;
2722         rxq->nb_rx_hold = 0;
2723         rxq->pkt_first_seg = NULL;
2724         rxq->pkt_last_seg = NULL;
2725
2726 #ifdef RTE_IXGBE_INC_VECTOR
2727         rxq->rxrearm_start = 0;
2728         rxq->rxrearm_nb = 0;
2729 #endif
2730 }
2731
2732 int __attribute__((cold))
2733 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2734                          uint16_t queue_idx,
2735                          uint16_t nb_desc,
2736                          unsigned int socket_id,
2737                          const struct rte_eth_rxconf *rx_conf,
2738                          struct rte_mempool *mp)
2739 {
2740         const struct rte_memzone *rz;
2741         struct ixgbe_rx_queue *rxq;
2742         struct ixgbe_hw     *hw;
2743         uint16_t len;
2744         struct ixgbe_adapter *adapter =
2745                 (struct ixgbe_adapter *)dev->data->dev_private;
2746
2747         PMD_INIT_FUNC_TRACE();
2748         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2749
2750         /*
2751          * Validate number of receive descriptors.
2752          * It must not exceed hardware maximum, and must be multiple
2753          * of IXGBE_ALIGN.
2754          */
2755         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2756                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2757                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2758                 return -EINVAL;
2759         }
2760
2761         /* Free memory prior to re-allocation if needed... */
2762         if (dev->data->rx_queues[queue_idx] != NULL) {
2763                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2764                 dev->data->rx_queues[queue_idx] = NULL;
2765         }
2766
2767         /* First allocate the rx queue data structure */
2768         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2769                                  RTE_CACHE_LINE_SIZE, socket_id);
2770         if (rxq == NULL)
2771                 return -ENOMEM;
2772         rxq->mb_pool = mp;
2773         rxq->nb_rx_desc = nb_desc;
2774         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2775         rxq->queue_id = queue_idx;
2776         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2777                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2778         rxq->port_id = dev->data->port_id;
2779         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2780                                                         0 : ETHER_CRC_LEN);
2781         rxq->drop_en = rx_conf->rx_drop_en;
2782         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2783
2784         /*
2785          * The packet type in RX descriptor is different for different NICs.
2786          * Some bits are used for x550 but reserved for other NICS.
2787          * So set different masks for different NICs.
2788          */
2789         if (hw->mac.type == ixgbe_mac_X550 ||
2790             hw->mac.type == ixgbe_mac_X550EM_x ||
2791             hw->mac.type == ixgbe_mac_X550EM_a ||
2792             hw->mac.type == ixgbe_mac_X550_vf ||
2793             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2794             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2795                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2796         else
2797                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2798
2799         /*
2800          * Allocate RX ring hardware descriptors. A memzone large enough to
2801          * handle the maximum ring size is allocated in order to allow for
2802          * resizing in later calls to the queue setup function.
2803          */
2804         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2805                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2806         if (rz == NULL) {
2807                 ixgbe_rx_queue_release(rxq);
2808                 return -ENOMEM;
2809         }
2810
2811         /*
2812          * Zero init all the descriptors in the ring.
2813          */
2814         memset(rz->addr, 0, RX_RING_SZ);
2815
2816         /*
2817          * Modified to setup VFRDT for Virtual Function
2818          */
2819         if (hw->mac.type == ixgbe_mac_82599_vf ||
2820             hw->mac.type == ixgbe_mac_X540_vf ||
2821             hw->mac.type == ixgbe_mac_X550_vf ||
2822             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2823             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2824                 rxq->rdt_reg_addr =
2825                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2826                 rxq->rdh_reg_addr =
2827                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2828         } else {
2829                 rxq->rdt_reg_addr =
2830                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2831                 rxq->rdh_reg_addr =
2832                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2833         }
2834
2835         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2836         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2837
2838         /*
2839          * Certain constraints must be met in order to use the bulk buffer
2840          * allocation Rx burst function. If any of Rx queues doesn't meet them
2841          * the feature should be disabled for the whole port.
2842          */
2843         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2844                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2845                                     "preconditions - canceling the feature for "
2846                                     "the whole port[%d]",
2847                              rxq->queue_id, rxq->port_id);
2848                 adapter->rx_bulk_alloc_allowed = false;
2849         }
2850
2851         /*
2852          * Allocate software ring. Allow for space at the end of the
2853          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2854          * function does not access an invalid memory region.
2855          */
2856         len = nb_desc;
2857         if (adapter->rx_bulk_alloc_allowed)
2858                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2859
2860         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2861                                           sizeof(struct ixgbe_rx_entry) * len,
2862                                           RTE_CACHE_LINE_SIZE, socket_id);
2863         if (!rxq->sw_ring) {
2864                 ixgbe_rx_queue_release(rxq);
2865                 return -ENOMEM;
2866         }
2867
2868         /*
2869          * Always allocate even if it's not going to be needed in order to
2870          * simplify the code.
2871          *
2872          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2873          * be requested in ixgbe_dev_rx_init(), which is called later from
2874          * dev_start() flow.
2875          */
2876         rxq->sw_sc_ring =
2877                 rte_zmalloc_socket("rxq->sw_sc_ring",
2878                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2879                                    RTE_CACHE_LINE_SIZE, socket_id);
2880         if (!rxq->sw_sc_ring) {
2881                 ixgbe_rx_queue_release(rxq);
2882                 return -ENOMEM;
2883         }
2884
2885         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2886                             "dma_addr=0x%"PRIx64,
2887                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2888                      rxq->rx_ring_phys_addr);
2889
2890         if (!rte_is_power_of_2(nb_desc)) {
2891                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2892                                     "preconditions - canceling the feature for "
2893                                     "the whole port[%d]",
2894                              rxq->queue_id, rxq->port_id);
2895                 adapter->rx_vec_allowed = false;
2896         } else
2897                 ixgbe_rxq_vec_setup(rxq);
2898
2899         dev->data->rx_queues[queue_idx] = rxq;
2900
2901         ixgbe_reset_rx_queue(adapter, rxq);
2902
2903         return 0;
2904 }
2905
2906 uint32_t
2907 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2908 {
2909 #define IXGBE_RXQ_SCAN_INTERVAL 4
2910         volatile union ixgbe_adv_rx_desc *rxdp;
2911         struct ixgbe_rx_queue *rxq;
2912         uint32_t desc = 0;
2913
2914         rxq = dev->data->rx_queues[rx_queue_id];
2915         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2916
2917         while ((desc < rxq->nb_rx_desc) &&
2918                 (rxdp->wb.upper.status_error &
2919                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2920                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2921                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2922                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2923                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2924                                 desc - rxq->nb_rx_desc]);
2925         }
2926
2927         return desc;
2928 }
2929
2930 int
2931 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2932 {
2933         volatile union ixgbe_adv_rx_desc *rxdp;
2934         struct ixgbe_rx_queue *rxq = rx_queue;
2935         uint32_t desc;
2936
2937         if (unlikely(offset >= rxq->nb_rx_desc))
2938                 return 0;
2939         desc = rxq->rx_tail + offset;
2940         if (desc >= rxq->nb_rx_desc)
2941                 desc -= rxq->nb_rx_desc;
2942
2943         rxdp = &rxq->rx_ring[desc];
2944         return !!(rxdp->wb.upper.status_error &
2945                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2946 }
2947
2948 int
2949 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
2950 {
2951         struct ixgbe_rx_queue *rxq = rx_queue;
2952         volatile uint32_t *status;
2953         uint32_t nb_hold, desc;
2954
2955         if (unlikely(offset >= rxq->nb_rx_desc))
2956                 return -EINVAL;
2957
2958 #ifdef RTE_IXGBE_INC_VECTOR
2959         if (rxq->rx_using_sse)
2960                 nb_hold = rxq->rxrearm_nb;
2961         else
2962 #endif
2963                 nb_hold = rxq->nb_rx_hold;
2964         if (offset >= rxq->nb_rx_desc - nb_hold)
2965                 return RTE_ETH_RX_DESC_UNAVAIL;
2966
2967         desc = rxq->rx_tail + offset;
2968         if (desc >= rxq->nb_rx_desc)
2969                 desc -= rxq->nb_rx_desc;
2970
2971         status = &rxq->rx_ring[desc].wb.upper.status_error;
2972         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
2973                 return RTE_ETH_RX_DESC_DONE;
2974
2975         return RTE_ETH_RX_DESC_AVAIL;
2976 }
2977
2978 int
2979 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
2980 {
2981         struct ixgbe_tx_queue *txq = tx_queue;
2982         volatile uint32_t *status;
2983         uint32_t desc;
2984
2985         if (unlikely(offset >= txq->nb_tx_desc))
2986                 return -EINVAL;
2987
2988         desc = txq->tx_tail + offset;
2989         /* go to next desc that has the RS bit */
2990         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
2991                 txq->tx_rs_thresh;
2992         if (desc >= txq->nb_tx_desc) {
2993                 desc -= txq->nb_tx_desc;
2994                 if (desc >= txq->nb_tx_desc)
2995                         desc -= txq->nb_tx_desc;
2996         }
2997
2998         status = &txq->tx_ring[desc].wb.status;
2999         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3000                 return RTE_ETH_TX_DESC_DONE;
3001
3002         return RTE_ETH_TX_DESC_FULL;
3003 }
3004
3005 void __attribute__((cold))
3006 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3007 {
3008         unsigned i;
3009         struct ixgbe_adapter *adapter =
3010                 (struct ixgbe_adapter *)dev->data->dev_private;
3011
3012         PMD_INIT_FUNC_TRACE();
3013
3014         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3015                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3016
3017                 if (txq != NULL) {
3018                         txq->ops->release_mbufs(txq);
3019                         txq->ops->reset(txq);
3020                 }
3021         }
3022
3023         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3024                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3025
3026                 if (rxq != NULL) {
3027                         ixgbe_rx_queue_release_mbufs(rxq);
3028                         ixgbe_reset_rx_queue(adapter, rxq);
3029                 }
3030         }
3031 }
3032
3033 void
3034 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3035 {
3036         unsigned i;
3037
3038         PMD_INIT_FUNC_TRACE();
3039
3040         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3041                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3042                 dev->data->rx_queues[i] = NULL;
3043         }
3044         dev->data->nb_rx_queues = 0;
3045
3046         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3047                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3048                 dev->data->tx_queues[i] = NULL;
3049         }
3050         dev->data->nb_tx_queues = 0;
3051 }
3052
3053 /*********************************************************************
3054  *
3055  *  Device RX/TX init functions
3056  *
3057  **********************************************************************/
3058
3059 /**
3060  * Receive Side Scaling (RSS)
3061  * See section 7.1.2.8 in the following document:
3062  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3063  *
3064  * Principles:
3065  * The source and destination IP addresses of the IP header and the source
3066  * and destination ports of TCP/UDP headers, if any, of received packets are
3067  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3068  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3069  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3070  * RSS output index which is used as the RX queue index where to store the
3071  * received packets.
3072  * The following output is supplied in the RX write-back descriptor:
3073  *     - 32-bit result of the Microsoft RSS hash function,
3074  *     - 4-bit RSS type field.
3075  */
3076
3077 /*
3078  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3079  * Used as the default key.
3080  */
3081 static uint8_t rss_intel_key[40] = {
3082         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3083         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3084         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3085         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3086         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3087 };
3088
3089 static void
3090 ixgbe_rss_disable(struct rte_eth_dev *dev)
3091 {
3092         struct ixgbe_hw *hw;
3093         uint32_t mrqc;
3094         uint32_t mrqc_reg;
3095
3096         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3097         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3098         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3099         mrqc &= ~IXGBE_MRQC_RSSEN;
3100         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3101 }
3102
3103 static void
3104 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3105 {
3106         uint8_t  *hash_key;
3107         uint32_t mrqc;
3108         uint32_t rss_key;
3109         uint64_t rss_hf;
3110         uint16_t i;
3111         uint32_t mrqc_reg;
3112         uint32_t rssrk_reg;
3113
3114         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3115         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3116
3117         hash_key = rss_conf->rss_key;
3118         if (hash_key != NULL) {
3119                 /* Fill in RSS hash key */
3120                 for (i = 0; i < 10; i++) {
3121                         rss_key  = hash_key[(i * 4)];
3122                         rss_key |= hash_key[(i * 4) + 1] << 8;
3123                         rss_key |= hash_key[(i * 4) + 2] << 16;
3124                         rss_key |= hash_key[(i * 4) + 3] << 24;
3125                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3126                 }
3127         }
3128
3129         /* Set configured hashing protocols in MRQC register */
3130         rss_hf = rss_conf->rss_hf;
3131         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3132         if (rss_hf & ETH_RSS_IPV4)
3133                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3134         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3135                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3136         if (rss_hf & ETH_RSS_IPV6)
3137                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3138         if (rss_hf & ETH_RSS_IPV6_EX)
3139                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3140         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3141                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3142         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3143                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3144         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3145                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3146         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3147                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3148         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3149                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3150         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3151 }
3152
3153 int
3154 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3155                           struct rte_eth_rss_conf *rss_conf)
3156 {
3157         struct ixgbe_hw *hw;
3158         uint32_t mrqc;
3159         uint64_t rss_hf;
3160         uint32_t mrqc_reg;
3161
3162         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3163
3164         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3165                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3166                         "NIC.");
3167                 return -ENOTSUP;
3168         }
3169         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3170
3171         /*
3172          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3173          *     "RSS enabling cannot be done dynamically while it must be
3174          *      preceded by a software reset"
3175          * Before changing anything, first check that the update RSS operation
3176          * does not attempt to disable RSS, if RSS was enabled at
3177          * initialization time, or does not attempt to enable RSS, if RSS was
3178          * disabled at initialization time.
3179          */
3180         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3181         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3182         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3183                 if (rss_hf != 0) /* Enable RSS */
3184                         return -(EINVAL);
3185                 return 0; /* Nothing to do */
3186         }
3187         /* RSS enabled */
3188         if (rss_hf == 0) /* Disable RSS */
3189                 return -(EINVAL);
3190         ixgbe_hw_rss_hash_set(hw, rss_conf);
3191         return 0;
3192 }
3193
3194 int
3195 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3196                             struct rte_eth_rss_conf *rss_conf)
3197 {
3198         struct ixgbe_hw *hw;
3199         uint8_t *hash_key;
3200         uint32_t mrqc;
3201         uint32_t rss_key;
3202         uint64_t rss_hf;
3203         uint16_t i;
3204         uint32_t mrqc_reg;
3205         uint32_t rssrk_reg;
3206
3207         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3208         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3209         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3210         hash_key = rss_conf->rss_key;
3211         if (hash_key != NULL) {
3212                 /* Return RSS hash key */
3213                 for (i = 0; i < 10; i++) {
3214                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3215                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3216                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3217                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3218                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3219                 }
3220         }
3221
3222         /* Get RSS functions configured in MRQC register */
3223         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3224         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3225                 rss_conf->rss_hf = 0;
3226                 return 0;
3227         }
3228         rss_hf = 0;
3229         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3230                 rss_hf |= ETH_RSS_IPV4;
3231         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3232                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3233         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3234                 rss_hf |= ETH_RSS_IPV6;
3235         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3236                 rss_hf |= ETH_RSS_IPV6_EX;
3237         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3238                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3239         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3240                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3241         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3242                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3243         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3244                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3245         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3246                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3247         rss_conf->rss_hf = rss_hf;
3248         return 0;
3249 }
3250
3251 static void
3252 ixgbe_rss_configure(struct rte_eth_dev *dev)
3253 {
3254         struct rte_eth_rss_conf rss_conf;
3255         struct ixgbe_hw *hw;
3256         uint32_t reta;
3257         uint16_t i;
3258         uint16_t j;
3259         uint16_t sp_reta_size;
3260         uint32_t reta_reg;
3261
3262         PMD_INIT_FUNC_TRACE();
3263         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3264
3265         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3266
3267         /*
3268          * Fill in redirection table
3269          * The byte-swap is needed because NIC registers are in
3270          * little-endian order.
3271          */
3272         reta = 0;
3273         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3274                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3275
3276                 if (j == dev->data->nb_rx_queues)
3277                         j = 0;
3278                 reta = (reta << 8) | j;
3279                 if ((i & 3) == 3)
3280                         IXGBE_WRITE_REG(hw, reta_reg,
3281                                         rte_bswap32(reta));
3282         }
3283
3284         /*
3285          * Configure the RSS key and the RSS protocols used to compute
3286          * the RSS hash of input packets.
3287          */
3288         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3289         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3290                 ixgbe_rss_disable(dev);
3291                 return;
3292         }
3293         if (rss_conf.rss_key == NULL)
3294                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3295         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3296 }
3297
3298 #define NUM_VFTA_REGISTERS 128
3299 #define NIC_RX_BUFFER_SIZE 0x200
3300 #define X550_RX_BUFFER_SIZE 0x180
3301
3302 static void
3303 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3304 {
3305         struct rte_eth_vmdq_dcb_conf *cfg;
3306         struct ixgbe_hw *hw;
3307         enum rte_eth_nb_pools num_pools;
3308         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3309         uint16_t pbsize;
3310         uint8_t nb_tcs; /* number of traffic classes */
3311         int i;
3312
3313         PMD_INIT_FUNC_TRACE();
3314         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3315         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3316         num_pools = cfg->nb_queue_pools;
3317         /* Check we have a valid number of pools */
3318         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3319                 ixgbe_rss_disable(dev);
3320                 return;
3321         }
3322         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3323         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3324
3325         /*
3326          * RXPBSIZE
3327          * split rx buffer up into sections, each for 1 traffic class
3328          */
3329         switch (hw->mac.type) {
3330         case ixgbe_mac_X550:
3331         case ixgbe_mac_X550EM_x:
3332         case ixgbe_mac_X550EM_a:
3333                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3334                 break;
3335         default:
3336                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3337                 break;
3338         }
3339         for (i = 0; i < nb_tcs; i++) {
3340                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3341
3342                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3343                 /* clear 10 bits. */
3344                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3345                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3346         }
3347         /* zero alloc all unused TCs */
3348         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3349                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3350
3351                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3352                 /* clear 10 bits. */
3353                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3354         }
3355
3356         /* MRQC: enable vmdq and dcb */
3357         mrqc = (num_pools == ETH_16_POOLS) ?
3358                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3359         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3360
3361         /* PFVTCTL: turn on virtualisation and set the default pool */
3362         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3363         if (cfg->enable_default_pool) {
3364                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3365         } else {
3366                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3367         }
3368
3369         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3370
3371         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3372         queue_mapping = 0;
3373         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3374                 /*
3375                  * mapping is done with 3 bits per priority,
3376                  * so shift by i*3 each time
3377                  */
3378                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3379
3380         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3381
3382         /* RTRPCS: DCB related */
3383         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3384
3385         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3386         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3387         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3388         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3389
3390         /* VFTA - enable all vlan filters */
3391         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3392                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3393         }
3394
3395         /* VFRE: pool enabling for receive - 16 or 32 */
3396         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3397                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3398
3399         /*
3400          * MPSAR - allow pools to read specific mac addresses
3401          * In this case, all pools should be able to read from mac addr 0
3402          */
3403         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3404         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3405
3406         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3407         for (i = 0; i < cfg->nb_pool_maps; i++) {
3408                 /* set vlan id in VF register and set the valid bit */
3409                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3410                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3411                 /*
3412                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3413                  * pools, we only need to use the first half of the register
3414                  * i.e. bits 0-31
3415                  */
3416                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3417         }
3418 }
3419
3420 /**
3421  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3422  * @dev: pointer to eth_dev structure
3423  * @dcb_config: pointer to ixgbe_dcb_config structure
3424  */
3425 static void
3426 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3427                        struct ixgbe_dcb_config *dcb_config)
3428 {
3429         uint32_t reg;
3430         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3431
3432         PMD_INIT_FUNC_TRACE();
3433         if (hw->mac.type != ixgbe_mac_82598EB) {
3434                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3435                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3436                 reg |= IXGBE_RTTDCS_ARBDIS;
3437                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3438
3439                 /* Enable DCB for Tx with 8 TCs */
3440                 if (dcb_config->num_tcs.pg_tcs == 8) {
3441                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3442                 } else {
3443                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3444                 }
3445                 if (dcb_config->vt_mode)
3446                         reg |= IXGBE_MTQC_VT_ENA;
3447                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3448
3449                 /* Enable the Tx desc arbiter */
3450                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3451                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3452                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3453
3454                 /* Enable Security TX Buffer IFG for DCB */
3455                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3456                 reg |= IXGBE_SECTX_DCB;
3457                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3458         }
3459 }
3460
3461 /**
3462  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3463  * @dev: pointer to rte_eth_dev structure
3464  * @dcb_config: pointer to ixgbe_dcb_config structure
3465  */
3466 static void
3467 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3468                         struct ixgbe_dcb_config *dcb_config)
3469 {
3470         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3471                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3472         struct ixgbe_hw *hw =
3473                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3474
3475         PMD_INIT_FUNC_TRACE();
3476         if (hw->mac.type != ixgbe_mac_82598EB)
3477                 /*PF VF Transmit Enable*/
3478                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3479                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3480
3481         /*Configure general DCB TX parameters*/
3482         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3483 }
3484
3485 static void
3486 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3487                         struct ixgbe_dcb_config *dcb_config)
3488 {
3489         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3490                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3491         struct ixgbe_dcb_tc_config *tc;
3492         uint8_t i, j;
3493
3494         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3495         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3496                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3497                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3498         } else {
3499                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3500                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3501         }
3502         /* User Priority to Traffic Class mapping */
3503         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3504                 j = vmdq_rx_conf->dcb_tc[i];
3505                 tc = &dcb_config->tc_config[j];
3506                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3507                                                 (uint8_t)(1 << j);
3508         }
3509 }
3510
3511 static void
3512 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3513                         struct ixgbe_dcb_config *dcb_config)
3514 {
3515         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3516                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3517         struct ixgbe_dcb_tc_config *tc;
3518         uint8_t i, j;
3519
3520         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3521         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3522                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3523                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3524         } else {
3525                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3526                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3527         }
3528
3529         /* User Priority to Traffic Class mapping */
3530         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3531                 j = vmdq_tx_conf->dcb_tc[i];
3532                 tc = &dcb_config->tc_config[j];
3533                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3534                                                 (uint8_t)(1 << j);
3535         }
3536 }
3537
3538 static void
3539 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3540                 struct ixgbe_dcb_config *dcb_config)
3541 {
3542         struct rte_eth_dcb_rx_conf *rx_conf =
3543                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3544         struct ixgbe_dcb_tc_config *tc;
3545         uint8_t i, j;
3546
3547         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3548         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3549
3550         /* User Priority to Traffic Class mapping */
3551         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3552                 j = rx_conf->dcb_tc[i];
3553                 tc = &dcb_config->tc_config[j];
3554                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3555                                                 (uint8_t)(1 << j);
3556         }
3557 }
3558
3559 static void
3560 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3561                 struct ixgbe_dcb_config *dcb_config)
3562 {
3563         struct rte_eth_dcb_tx_conf *tx_conf =
3564                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3565         struct ixgbe_dcb_tc_config *tc;
3566         uint8_t i, j;
3567
3568         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3569         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3570
3571         /* User Priority to Traffic Class mapping */
3572         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3573                 j = tx_conf->dcb_tc[i];
3574                 tc = &dcb_config->tc_config[j];
3575                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3576                                                 (uint8_t)(1 << j);
3577         }
3578 }
3579
3580 /**
3581  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3582  * @dev: pointer to eth_dev structure
3583  * @dcb_config: pointer to ixgbe_dcb_config structure
3584  */
3585 static void
3586 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3587                        struct ixgbe_dcb_config *dcb_config)
3588 {
3589         uint32_t reg;
3590         uint32_t vlanctrl;
3591         uint8_t i;
3592         uint32_t q;
3593         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3594
3595         PMD_INIT_FUNC_TRACE();
3596         /*
3597          * Disable the arbiter before changing parameters
3598          * (always enable recycle mode; WSP)
3599          */
3600         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3601         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3602
3603         if (hw->mac.type != ixgbe_mac_82598EB) {
3604                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3605                 if (dcb_config->num_tcs.pg_tcs == 4) {
3606                         if (dcb_config->vt_mode)
3607                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3608                                         IXGBE_MRQC_VMDQRT4TCEN;
3609                         else {
3610                                 /* no matter the mode is DCB or DCB_RSS, just
3611                                  * set the MRQE to RSSXTCEN. RSS is controlled
3612                                  * by RSS_FIELD
3613                                  */
3614                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3615                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3616                                         IXGBE_MRQC_RTRSS4TCEN;
3617                         }
3618                 }
3619                 if (dcb_config->num_tcs.pg_tcs == 8) {
3620                         if (dcb_config->vt_mode)
3621                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3622                                         IXGBE_MRQC_VMDQRT8TCEN;
3623                         else {
3624                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3625                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3626                                         IXGBE_MRQC_RTRSS8TCEN;
3627                         }
3628                 }
3629
3630                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3631
3632                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3633                         /* Disable drop for all queues in VMDQ mode*/
3634                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3635                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3636                                                 (IXGBE_QDE_WRITE |
3637                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3638                 } else {
3639                         /* Enable drop for all queues in SRIOV mode */
3640                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3641                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3642                                                 (IXGBE_QDE_WRITE |
3643                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3644                                                  IXGBE_QDE_ENABLE));
3645                 }
3646         }
3647
3648         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3649         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3650         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3651         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3652
3653         /* VFTA - enable all vlan filters */
3654         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3655                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3656         }
3657
3658         /*
3659          * Configure Rx packet plane (recycle mode; WSP) and
3660          * enable arbiter
3661          */
3662         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3663         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3664 }
3665
3666 static void
3667 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3668                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3669 {
3670         switch (hw->mac.type) {
3671         case ixgbe_mac_82598EB:
3672                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3673                 break;
3674         case ixgbe_mac_82599EB:
3675         case ixgbe_mac_X540:
3676         case ixgbe_mac_X550:
3677         case ixgbe_mac_X550EM_x:
3678         case ixgbe_mac_X550EM_a:
3679                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3680                                                   tsa, map);
3681                 break;
3682         default:
3683                 break;
3684         }
3685 }
3686
3687 static void
3688 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3689                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3690 {
3691         switch (hw->mac.type) {
3692         case ixgbe_mac_82598EB:
3693                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3694                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3695                 break;
3696         case ixgbe_mac_82599EB:
3697         case ixgbe_mac_X540:
3698         case ixgbe_mac_X550:
3699         case ixgbe_mac_X550EM_x:
3700         case ixgbe_mac_X550EM_a:
3701                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3702                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3703                 break;
3704         default:
3705                 break;
3706         }
3707 }
3708
3709 #define DCB_RX_CONFIG  1
3710 #define DCB_TX_CONFIG  1
3711 #define DCB_TX_PB      1024
3712 /**
3713  * ixgbe_dcb_hw_configure - Enable DCB and configure
3714  * general DCB in VT mode and non-VT mode parameters
3715  * @dev: pointer to rte_eth_dev structure
3716  * @dcb_config: pointer to ixgbe_dcb_config structure
3717  */
3718 static int
3719 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3720                         struct ixgbe_dcb_config *dcb_config)
3721 {
3722         int     ret = 0;
3723         uint8_t i, pfc_en, nb_tcs;
3724         uint16_t pbsize, rx_buffer_size;
3725         uint8_t config_dcb_rx = 0;
3726         uint8_t config_dcb_tx = 0;
3727         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3728         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3729         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3730         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3731         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3732         struct ixgbe_dcb_tc_config *tc;
3733         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3734         struct ixgbe_hw *hw =
3735                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3736
3737         switch (dev->data->dev_conf.rxmode.mq_mode) {
3738         case ETH_MQ_RX_VMDQ_DCB:
3739                 dcb_config->vt_mode = true;
3740                 if (hw->mac.type != ixgbe_mac_82598EB) {
3741                         config_dcb_rx = DCB_RX_CONFIG;
3742                         /*
3743                          *get dcb and VT rx configuration parameters
3744                          *from rte_eth_conf
3745                          */
3746                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3747                         /*Configure general VMDQ and DCB RX parameters*/
3748                         ixgbe_vmdq_dcb_configure(dev);
3749                 }
3750                 break;
3751         case ETH_MQ_RX_DCB:
3752         case ETH_MQ_RX_DCB_RSS:
3753                 dcb_config->vt_mode = false;
3754                 config_dcb_rx = DCB_RX_CONFIG;
3755                 /* Get dcb TX configuration parameters from rte_eth_conf */
3756                 ixgbe_dcb_rx_config(dev, dcb_config);
3757                 /*Configure general DCB RX parameters*/
3758                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3759                 break;
3760         default:
3761                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3762                 break;
3763         }
3764         switch (dev->data->dev_conf.txmode.mq_mode) {
3765         case ETH_MQ_TX_VMDQ_DCB:
3766                 dcb_config->vt_mode = true;
3767                 config_dcb_tx = DCB_TX_CONFIG;
3768                 /* get DCB and VT TX configuration parameters
3769                  * from rte_eth_conf
3770                  */
3771                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3772                 /*Configure general VMDQ and DCB TX parameters*/
3773                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3774                 break;
3775
3776         case ETH_MQ_TX_DCB:
3777                 dcb_config->vt_mode = false;
3778                 config_dcb_tx = DCB_TX_CONFIG;
3779                 /*get DCB TX configuration parameters from rte_eth_conf*/
3780                 ixgbe_dcb_tx_config(dev, dcb_config);
3781                 /*Configure general DCB TX parameters*/
3782                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3783                 break;
3784         default:
3785                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3786                 break;
3787         }
3788
3789         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3790         /* Unpack map */
3791         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3792         if (nb_tcs == ETH_4_TCS) {
3793                 /* Avoid un-configured priority mapping to TC0 */
3794                 uint8_t j = 4;
3795                 uint8_t mask = 0xFF;
3796
3797                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3798                         mask = (uint8_t)(mask & (~(1 << map[i])));
3799                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3800                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3801                                 map[j++] = i;
3802                         mask >>= 1;
3803                 }
3804                 /* Re-configure 4 TCs BW */
3805                 for (i = 0; i < nb_tcs; i++) {
3806                         tc = &dcb_config->tc_config[i];
3807                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3808                                                 (uint8_t)(100 / nb_tcs);
3809                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3810                                                 (uint8_t)(100 / nb_tcs);
3811                 }
3812                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3813                         tc = &dcb_config->tc_config[i];
3814                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3815                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3816                 }
3817         } else {
3818                 /* Re-configure 8 TCs BW */
3819                 for (i = 0; i < nb_tcs; i++) {
3820                         tc = &dcb_config->tc_config[i];
3821                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3822                                 (uint8_t)(100 / nb_tcs + (i & 1));
3823                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3824                                 (uint8_t)(100 / nb_tcs + (i & 1));
3825                 }
3826         }
3827
3828         switch (hw->mac.type) {
3829         case ixgbe_mac_X550:
3830         case ixgbe_mac_X550EM_x:
3831         case ixgbe_mac_X550EM_a:
3832                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3833                 break;
3834         default:
3835                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3836                 break;
3837         }
3838
3839         if (config_dcb_rx) {
3840                 /* Set RX buffer size */
3841                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3842                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3843
3844                 for (i = 0; i < nb_tcs; i++) {
3845                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3846                 }
3847                 /* zero alloc all unused TCs */
3848                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3849                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3850                 }
3851         }
3852         if (config_dcb_tx) {
3853                 /* Only support an equally distributed
3854                  *  Tx packet buffer strategy.
3855                  */
3856                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3857                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3858
3859                 for (i = 0; i < nb_tcs; i++) {
3860                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3861                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3862                 }
3863                 /* Clear unused TCs, if any, to zero buffer size*/
3864                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3865                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3866                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3867                 }
3868         }
3869
3870         /*Calculates traffic class credits*/
3871         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3872                                 IXGBE_DCB_TX_CONFIG);
3873         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3874                                 IXGBE_DCB_RX_CONFIG);
3875
3876         if (config_dcb_rx) {
3877                 /* Unpack CEE standard containers */
3878                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3879                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3880                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3881                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3882                 /* Configure PG(ETS) RX */
3883                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3884         }
3885
3886         if (config_dcb_tx) {
3887                 /* Unpack CEE standard containers */
3888                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3889                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3890                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3891                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3892                 /* Configure PG(ETS) TX */
3893                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3894         }
3895
3896         /*Configure queue statistics registers*/
3897         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3898
3899         /* Check if the PFC is supported */
3900         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3901                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3902                 for (i = 0; i < nb_tcs; i++) {
3903                         /*
3904                         * If the TC count is 8,and the default high_water is 48,
3905                         * the low_water is 16 as default.
3906                         */
3907                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3908                         hw->fc.low_water[i] = pbsize / 4;
3909                         /* Enable pfc for this TC */
3910                         tc = &dcb_config->tc_config[i];
3911                         tc->pfc = ixgbe_dcb_pfc_enabled;
3912                 }
3913                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3914                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3915                         pfc_en &= 0x0F;
3916                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3917         }
3918
3919         return ret;
3920 }
3921
3922 /**
3923  * ixgbe_configure_dcb - Configure DCB  Hardware
3924  * @dev: pointer to rte_eth_dev
3925  */
3926 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3927 {
3928         struct ixgbe_dcb_config *dcb_cfg =
3929                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3930         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3931
3932         PMD_INIT_FUNC_TRACE();
3933
3934         /* check support mq_mode for DCB */
3935         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3936             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3937             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3938                 return;
3939
3940         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3941                 return;
3942
3943         /** Configure DCB hardware **/
3944         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3945 }
3946
3947 /*
3948  * VMDq only support for 10 GbE NIC.
3949  */
3950 static void
3951 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3952 {
3953         struct rte_eth_vmdq_rx_conf *cfg;
3954         struct ixgbe_hw *hw;
3955         enum rte_eth_nb_pools num_pools;
3956         uint32_t mrqc, vt_ctl, vlanctrl;
3957         uint32_t vmolr = 0;
3958         int i;
3959
3960         PMD_INIT_FUNC_TRACE();
3961         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3962         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3963         num_pools = cfg->nb_queue_pools;
3964
3965         ixgbe_rss_disable(dev);
3966
3967         /* MRQC: enable vmdq */
3968         mrqc = IXGBE_MRQC_VMDQEN;
3969         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3970
3971         /* PFVTCTL: turn on virtualisation and set the default pool */
3972         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3973         if (cfg->enable_default_pool)
3974                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3975         else
3976                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3977
3978         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3979
3980         for (i = 0; i < (int)num_pools; i++) {
3981                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
3982                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
3983         }
3984
3985         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3986         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3987         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3988         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3989
3990         /* VFTA - enable all vlan filters */
3991         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
3992                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
3993
3994         /* VFRE: pool enabling for receive - 64 */
3995         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
3996         if (num_pools == ETH_64_POOLS)
3997                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
3998
3999         /*
4000          * MPSAR - allow pools to read specific mac addresses
4001          * In this case, all pools should be able to read from mac addr 0
4002          */
4003         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4004         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4005
4006         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4007         for (i = 0; i < cfg->nb_pool_maps; i++) {
4008                 /* set vlan id in VF register and set the valid bit */
4009                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4010                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4011                 /*
4012                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4013                  * pools, we only need to use the first half of the register
4014                  * i.e. bits 0-31
4015                  */
4016                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4017                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4018                                         (cfg->pool_map[i].pools & UINT32_MAX));
4019                 else
4020                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4021                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4022
4023         }
4024
4025         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4026         if (cfg->enable_loop_back) {
4027                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4028                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4029                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4030         }
4031
4032         IXGBE_WRITE_FLUSH(hw);
4033 }
4034
4035 /*
4036  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4037  * @hw: pointer to hardware structure
4038  */
4039 static void
4040 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4041 {
4042         uint32_t reg;
4043         uint32_t q;
4044
4045         PMD_INIT_FUNC_TRACE();
4046         /*PF VF Transmit Enable*/
4047         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4048         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4049
4050         /* Disable the Tx desc arbiter so that MTQC can be changed */
4051         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4052         reg |= IXGBE_RTTDCS_ARBDIS;
4053         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4054
4055         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4056         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4057
4058         /* Disable drop for all queues */
4059         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4060                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4061                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4062
4063         /* Enable the Tx desc arbiter */
4064         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4065         reg &= ~IXGBE_RTTDCS_ARBDIS;
4066         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4067
4068         IXGBE_WRITE_FLUSH(hw);
4069 }
4070
4071 static int __attribute__((cold))
4072 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4073 {
4074         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4075         uint64_t dma_addr;
4076         unsigned int i;
4077
4078         /* Initialize software ring entries */
4079         for (i = 0; i < rxq->nb_rx_desc; i++) {
4080                 volatile union ixgbe_adv_rx_desc *rxd;
4081                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4082
4083                 if (mbuf == NULL) {
4084                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4085                                      (unsigned) rxq->queue_id);
4086                         return -ENOMEM;
4087                 }
4088
4089                 rte_mbuf_refcnt_set(mbuf, 1);
4090                 mbuf->next = NULL;
4091                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4092                 mbuf->nb_segs = 1;
4093                 mbuf->port = rxq->port_id;
4094
4095                 dma_addr =
4096                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4097                 rxd = &rxq->rx_ring[i];
4098                 rxd->read.hdr_addr = 0;
4099                 rxd->read.pkt_addr = dma_addr;
4100                 rxe[i].mbuf = mbuf;
4101         }
4102
4103         return 0;
4104 }
4105
4106 static int
4107 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4108 {
4109         struct ixgbe_hw *hw;
4110         uint32_t mrqc;
4111
4112         ixgbe_rss_configure(dev);
4113
4114         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4115
4116         /* MRQC: enable VF RSS */
4117         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4118         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4119         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4120         case ETH_64_POOLS:
4121                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4122                 break;
4123
4124         case ETH_32_POOLS:
4125                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4126                 break;
4127
4128         default:
4129                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4130                 return -EINVAL;
4131         }
4132
4133         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4134
4135         return 0;
4136 }
4137
4138 static int
4139 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4140 {
4141         struct ixgbe_hw *hw =
4142                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4143
4144         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4145         case ETH_64_POOLS:
4146                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4147                         IXGBE_MRQC_VMDQEN);
4148                 break;
4149
4150         case ETH_32_POOLS:
4151                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4152                         IXGBE_MRQC_VMDQRT4TCEN);
4153                 break;
4154
4155         case ETH_16_POOLS:
4156                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4157                         IXGBE_MRQC_VMDQRT8TCEN);
4158                 break;
4159         default:
4160                 PMD_INIT_LOG(ERR,
4161                         "invalid pool number in IOV mode");
4162                 break;
4163         }
4164         return 0;
4165 }
4166
4167 static int
4168 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4169 {
4170         struct ixgbe_hw *hw =
4171                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4172
4173         if (hw->mac.type == ixgbe_mac_82598EB)
4174                 return 0;
4175
4176         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4177                 /*
4178                  * SRIOV inactive scheme
4179                  * any DCB/RSS w/o VMDq multi-queue setting
4180                  */
4181                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4182                 case ETH_MQ_RX_RSS:
4183                 case ETH_MQ_RX_DCB_RSS:
4184                 case ETH_MQ_RX_VMDQ_RSS:
4185                         ixgbe_rss_configure(dev);
4186                         break;
4187
4188                 case ETH_MQ_RX_VMDQ_DCB:
4189                         ixgbe_vmdq_dcb_configure(dev);
4190                         break;
4191
4192                 case ETH_MQ_RX_VMDQ_ONLY:
4193                         ixgbe_vmdq_rx_hw_configure(dev);
4194                         break;
4195
4196                 case ETH_MQ_RX_NONE:
4197                 default:
4198                         /* if mq_mode is none, disable rss mode.*/
4199                         ixgbe_rss_disable(dev);
4200                         break;
4201                 }
4202         } else {
4203                 /* SRIOV active scheme
4204                  * Support RSS together with SRIOV.
4205                  */
4206                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4207                 case ETH_MQ_RX_RSS:
4208                 case ETH_MQ_RX_VMDQ_RSS:
4209                         ixgbe_config_vf_rss(dev);
4210                         break;
4211                 case ETH_MQ_RX_VMDQ_DCB:
4212                 case ETH_MQ_RX_DCB:
4213                 /* In SRIOV, the configuration is the same as VMDq case */
4214                         ixgbe_vmdq_dcb_configure(dev);
4215                         break;
4216                 /* DCB/RSS together with SRIOV is not supported */
4217                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4218                 case ETH_MQ_RX_DCB_RSS:
4219                         PMD_INIT_LOG(ERR,
4220                                 "Could not support DCB/RSS with VMDq & SRIOV");
4221                         return -1;
4222                 default:
4223                         ixgbe_config_vf_default(dev);
4224                         break;
4225                 }
4226         }
4227
4228         return 0;
4229 }
4230
4231 static int
4232 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4233 {
4234         struct ixgbe_hw *hw =
4235                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4236         uint32_t mtqc;
4237         uint32_t rttdcs;
4238
4239         if (hw->mac.type == ixgbe_mac_82598EB)
4240                 return 0;
4241
4242         /* disable arbiter before setting MTQC */
4243         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4244         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4245         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4246
4247         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4248                 /*
4249                  * SRIOV inactive scheme
4250                  * any DCB w/o VMDq multi-queue setting
4251                  */
4252                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4253                         ixgbe_vmdq_tx_hw_configure(hw);
4254                 else {
4255                         mtqc = IXGBE_MTQC_64Q_1PB;
4256                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4257                 }
4258         } else {
4259                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4260
4261                 /*
4262                  * SRIOV active scheme
4263                  * FIXME if support DCB together with VMDq & SRIOV
4264                  */
4265                 case ETH_64_POOLS:
4266                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4267                         break;
4268                 case ETH_32_POOLS:
4269                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4270                         break;
4271                 case ETH_16_POOLS:
4272                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4273                                 IXGBE_MTQC_8TC_8TQ;
4274                         break;
4275                 default:
4276                         mtqc = IXGBE_MTQC_64Q_1PB;
4277                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4278                 }
4279                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4280         }
4281
4282         /* re-enable arbiter */
4283         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4284         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4285
4286         return 0;
4287 }
4288
4289 /**
4290  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4291  *
4292  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4293  * spec rev. 3.0 chapter 8.2.3.8.13.
4294  *
4295  * @pool Memory pool of the Rx queue
4296  */
4297 static inline uint32_t
4298 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4299 {
4300         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4301
4302         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4303         uint16_t maxdesc =
4304                 IPV4_MAX_PKT_LEN /
4305                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4306
4307         if (maxdesc >= 16)
4308                 return IXGBE_RSCCTL_MAXDESC_16;
4309         else if (maxdesc >= 8)
4310                 return IXGBE_RSCCTL_MAXDESC_8;
4311         else if (maxdesc >= 4)
4312                 return IXGBE_RSCCTL_MAXDESC_4;
4313         else
4314                 return IXGBE_RSCCTL_MAXDESC_1;
4315 }
4316
4317 /**
4318  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4319  * interrupt
4320  *
4321  * (Taken from FreeBSD tree)
4322  * (yes this is all very magic and confusing :)
4323  *
4324  * @dev port handle
4325  * @entry the register array entry
4326  * @vector the MSIX vector for this queue
4327  * @type RX/TX/MISC
4328  */
4329 static void
4330 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4331 {
4332         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4333         u32 ivar, index;
4334
4335         vector |= IXGBE_IVAR_ALLOC_VAL;
4336
4337         switch (hw->mac.type) {
4338
4339         case ixgbe_mac_82598EB:
4340                 if (type == -1)
4341                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4342                 else
4343                         entry += (type * 64);
4344                 index = (entry >> 2) & 0x1F;
4345                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4346                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4347                 ivar |= (vector << (8 * (entry & 0x3)));
4348                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4349                 break;
4350
4351         case ixgbe_mac_82599EB:
4352         case ixgbe_mac_X540:
4353                 if (type == -1) { /* MISC IVAR */
4354                         index = (entry & 1) * 8;
4355                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4356                         ivar &= ~(0xFF << index);
4357                         ivar |= (vector << index);
4358                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4359                 } else {        /* RX/TX IVARS */
4360                         index = (16 * (entry & 1)) + (8 * type);
4361                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4362                         ivar &= ~(0xFF << index);
4363                         ivar |= (vector << index);
4364                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4365                 }
4366
4367                 break;
4368
4369         default:
4370                 break;
4371         }
4372 }
4373
4374 void __attribute__((cold))
4375 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4376 {
4377         uint16_t i, rx_using_sse;
4378         struct ixgbe_adapter *adapter =
4379                 (struct ixgbe_adapter *)dev->data->dev_private;
4380
4381         /*
4382          * In order to allow Vector Rx there are a few configuration
4383          * conditions to be met and Rx Bulk Allocation should be allowed.
4384          */
4385         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4386             !adapter->rx_bulk_alloc_allowed) {
4387                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4388                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4389                                     "not enabled",
4390                              dev->data->port_id);
4391
4392                 adapter->rx_vec_allowed = false;
4393         }
4394
4395         /*
4396          * Initialize the appropriate LRO callback.
4397          *
4398          * If all queues satisfy the bulk allocation preconditions
4399          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4400          * Otherwise use a single allocation version.
4401          */
4402         if (dev->data->lro) {
4403                 if (adapter->rx_bulk_alloc_allowed) {
4404                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4405                                            "allocation version");
4406                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4407                 } else {
4408                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4409                                            "allocation version");
4410                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4411                 }
4412         } else if (dev->data->scattered_rx) {
4413                 /*
4414                  * Set the non-LRO scattered callback: there are Vector and
4415                  * single allocation versions.
4416                  */
4417                 if (adapter->rx_vec_allowed) {
4418                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4419                                             "callback (port=%d).",
4420                                      dev->data->port_id);
4421
4422                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4423                 } else if (adapter->rx_bulk_alloc_allowed) {
4424                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4425                                            "allocation callback (port=%d).",
4426                                      dev->data->port_id);
4427                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4428                 } else {
4429                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4430                                             "single allocation) "
4431                                             "Scattered Rx callback "
4432                                             "(port=%d).",
4433                                      dev->data->port_id);
4434
4435                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4436                 }
4437         /*
4438          * Below we set "simple" callbacks according to port/queues parameters.
4439          * If parameters allow we are going to choose between the following
4440          * callbacks:
4441          *    - Vector
4442          *    - Bulk Allocation
4443          *    - Single buffer allocation (the simplest one)
4444          */
4445         } else if (adapter->rx_vec_allowed) {
4446                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4447                                     "burst size no less than %d (port=%d).",
4448                              RTE_IXGBE_DESCS_PER_LOOP,
4449                              dev->data->port_id);
4450
4451                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4452         } else if (adapter->rx_bulk_alloc_allowed) {
4453                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4454                                     "satisfied. Rx Burst Bulk Alloc function "
4455                                     "will be used on port=%d.",
4456                              dev->data->port_id);
4457
4458                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4459         } else {
4460                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4461                                     "satisfied, or Scattered Rx is requested "
4462                                     "(port=%d).",
4463                              dev->data->port_id);
4464
4465                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4466         }
4467
4468         /* Propagate information about RX function choice through all queues. */
4469
4470         rx_using_sse =
4471                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4472                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4473
4474         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4475                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4476
4477                 rxq->rx_using_sse = rx_using_sse;
4478         }
4479 }
4480
4481 /**
4482  * ixgbe_set_rsc - configure RSC related port HW registers
4483  *
4484  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4485  * of 82599 Spec (x540 configuration is virtually the same).
4486  *
4487  * @dev port handle
4488  *
4489  * Returns 0 in case of success or a non-zero error code
4490  */
4491 static int
4492 ixgbe_set_rsc(struct rte_eth_dev *dev)
4493 {
4494         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4495         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4496         struct rte_eth_dev_info dev_info = { 0 };
4497         bool rsc_capable = false;
4498         uint16_t i;
4499         uint32_t rdrxctl;
4500
4501         /* Sanity check */
4502         dev->dev_ops->dev_infos_get(dev, &dev_info);
4503         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4504                 rsc_capable = true;
4505
4506         if (!rsc_capable && rx_conf->enable_lro) {
4507                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4508                                    "support it");
4509                 return -EINVAL;
4510         }
4511
4512         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4513
4514         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4515                 /*
4516                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4517                  * 3.0 RSC configuration requires HW CRC stripping being
4518                  * enabled. If user requested both HW CRC stripping off
4519                  * and RSC on - return an error.
4520                  */
4521                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4522                                     "is disabled");
4523                 return -EINVAL;
4524         }
4525
4526         /* RFCTL configuration  */
4527         if (rsc_capable) {
4528                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4529
4530                 if (rx_conf->enable_lro)
4531                         /*
4532                          * Since NFS packets coalescing is not supported - clear
4533                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4534                          * enabled.
4535                          */
4536                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4537                                    IXGBE_RFCTL_NFSR_DIS);
4538                 else
4539                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4540
4541                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4542         }
4543
4544         /* If LRO hasn't been requested - we are done here. */
4545         if (!rx_conf->enable_lro)
4546                 return 0;
4547
4548         /* Set RDRXCTL.RSCACKC bit */
4549         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4550         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4551         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4552
4553         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4554         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4555                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4556                 uint32_t srrctl =
4557                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4558                 uint32_t rscctl =
4559                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4560                 uint32_t psrtype =
4561                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4562                 uint32_t eitr =
4563                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4564
4565                 /*
4566                  * ixgbe PMD doesn't support header-split at the moment.
4567                  *
4568                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4569                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4570                  * should be configured even if header split is not
4571                  * enabled. We will configure it 128 bytes following the
4572                  * recommendation in the spec.
4573                  */
4574                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4575                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4576                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4577
4578                 /*
4579                  * TODO: Consider setting the Receive Descriptor Minimum
4580                  * Threshold Size for an RSC case. This is not an obviously
4581                  * beneficiary option but the one worth considering...
4582                  */
4583
4584                 rscctl |= IXGBE_RSCCTL_RSCEN;
4585                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4586                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4587
4588                 /*
4589                  * RSC: Set ITR interval corresponding to 2K ints/s.
4590                  *
4591                  * Full-sized RSC aggregations for a 10Gb/s link will
4592                  * arrive at about 20K aggregation/s rate.
4593                  *
4594                  * 2K inst/s rate will make only 10% of the
4595                  * aggregations to be closed due to the interrupt timer
4596                  * expiration for a streaming at wire-speed case.
4597                  *
4598                  * For a sparse streaming case this setting will yield
4599                  * at most 500us latency for a single RSC aggregation.
4600                  */
4601                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4602                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4603
4604                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4605                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4606                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4607                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4608
4609                 /*
4610                  * RSC requires the mapping of the queue to the
4611                  * interrupt vector.
4612                  */
4613                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4614         }
4615
4616         dev->data->lro = 1;
4617
4618         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4619
4620         return 0;
4621 }
4622
4623 /*
4624  * Initializes Receive Unit.
4625  */
4626 int __attribute__((cold))
4627 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4628 {
4629         struct ixgbe_hw     *hw;
4630         struct ixgbe_rx_queue *rxq;
4631         uint64_t bus_addr;
4632         uint32_t rxctrl;
4633         uint32_t fctrl;
4634         uint32_t hlreg0;
4635         uint32_t maxfrs;
4636         uint32_t srrctl;
4637         uint32_t rdrxctl;
4638         uint32_t rxcsum;
4639         uint16_t buf_size;
4640         uint16_t i;
4641         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4642         int rc;
4643
4644         PMD_INIT_FUNC_TRACE();
4645         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4646
4647         /*
4648          * Make sure receives are disabled while setting
4649          * up the RX context (registers, descriptor rings, etc.).
4650          */
4651         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4652         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4653
4654         /* Enable receipt of broadcasted frames */
4655         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4656         fctrl |= IXGBE_FCTRL_BAM;
4657         fctrl |= IXGBE_FCTRL_DPF;
4658         fctrl |= IXGBE_FCTRL_PMCF;
4659         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4660
4661         /*
4662          * Configure CRC stripping, if any.
4663          */
4664         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4665         if (rx_conf->hw_strip_crc)
4666                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4667         else
4668                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4669
4670         /*
4671          * Configure jumbo frame support, if any.
4672          */
4673         if (rx_conf->jumbo_frame == 1) {
4674                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4675                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4676                 maxfrs &= 0x0000FFFF;
4677                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4678                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4679         } else
4680                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4681
4682         /*
4683          * If loopback mode is configured for 82599, set LPBK bit.
4684          */
4685         if (hw->mac.type == ixgbe_mac_82599EB &&
4686                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4687                 hlreg0 |= IXGBE_HLREG0_LPBK;
4688         else
4689                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4690
4691         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4692
4693         /* Setup RX queues */
4694         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4695                 rxq = dev->data->rx_queues[i];
4696
4697                 /*
4698                  * Reset crc_len in case it was changed after queue setup by a
4699                  * call to configure.
4700                  */
4701                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4702
4703                 /* Setup the Base and Length of the Rx Descriptor Rings */
4704                 bus_addr = rxq->rx_ring_phys_addr;
4705                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4706                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4707                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4708                                 (uint32_t)(bus_addr >> 32));
4709                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4710                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4711                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4712                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4713
4714                 /* Configure the SRRCTL register */
4715 #ifdef RTE_HEADER_SPLIT_ENABLE
4716                 /*
4717                  * Configure Header Split
4718                  */
4719                 if (rx_conf->header_split) {
4720                         if (hw->mac.type == ixgbe_mac_82599EB) {
4721                                 /* Must setup the PSRTYPE register */
4722                                 uint32_t psrtype;
4723
4724                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4725                                         IXGBE_PSRTYPE_UDPHDR   |
4726                                         IXGBE_PSRTYPE_IPV4HDR  |
4727                                         IXGBE_PSRTYPE_IPV6HDR;
4728                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4729                         }
4730                         srrctl = ((rx_conf->split_hdr_size <<
4731                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4732                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4733                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4734                 } else
4735 #endif
4736                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4737
4738                 /* Set if packets are dropped when no descriptors available */
4739                 if (rxq->drop_en)
4740                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4741
4742                 /*
4743                  * Configure the RX buffer size in the BSIZEPACKET field of
4744                  * the SRRCTL register of the queue.
4745                  * The value is in 1 KB resolution. Valid values can be from
4746                  * 1 KB to 16 KB.
4747                  */
4748                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4749                         RTE_PKTMBUF_HEADROOM);
4750                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4751                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4752
4753                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4754
4755                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4756                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4757
4758                 /* It adds dual VLAN length for supporting dual VLAN */
4759                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4760                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4761                         dev->data->scattered_rx = 1;
4762         }
4763
4764         if (rx_conf->enable_scatter)
4765                 dev->data->scattered_rx = 1;
4766
4767         /*
4768          * Device configured with multiple RX queues.
4769          */
4770         ixgbe_dev_mq_rx_configure(dev);
4771
4772         /*
4773          * Setup the Checksum Register.
4774          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4775          * Enable IP/L4 checkum computation by hardware if requested to do so.
4776          */
4777         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4778         rxcsum |= IXGBE_RXCSUM_PCSD;
4779         if (rx_conf->hw_ip_checksum)
4780                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4781         else
4782                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4783
4784         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4785
4786         if (hw->mac.type == ixgbe_mac_82599EB ||
4787             hw->mac.type == ixgbe_mac_X540) {
4788                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4789                 if (rx_conf->hw_strip_crc)
4790                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4791                 else
4792                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4793                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4794                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4795         }
4796
4797         rc = ixgbe_set_rsc(dev);
4798         if (rc)
4799                 return rc;
4800
4801         ixgbe_set_rx_function(dev);
4802
4803         return 0;
4804 }
4805
4806 /*
4807  * Initializes Transmit Unit.
4808  */
4809 void __attribute__((cold))
4810 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4811 {
4812         struct ixgbe_hw     *hw;
4813         struct ixgbe_tx_queue *txq;
4814         uint64_t bus_addr;
4815         uint32_t hlreg0;
4816         uint32_t txctrl;
4817         uint16_t i;
4818
4819         PMD_INIT_FUNC_TRACE();
4820         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4821
4822         /* Enable TX CRC (checksum offload requirement) and hw padding
4823          * (TSO requirement)
4824          */
4825         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4826         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4827         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4828
4829         /* Setup the Base and Length of the Tx Descriptor Rings */
4830         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4831                 txq = dev->data->tx_queues[i];
4832
4833                 bus_addr = txq->tx_ring_phys_addr;
4834                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4835                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4836                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4837                                 (uint32_t)(bus_addr >> 32));
4838                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4839                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4840                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4841                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4842                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4843
4844                 /*
4845                  * Disable Tx Head Writeback RO bit, since this hoses
4846                  * bookkeeping if things aren't delivered in order.
4847                  */
4848                 switch (hw->mac.type) {
4849                 case ixgbe_mac_82598EB:
4850                         txctrl = IXGBE_READ_REG(hw,
4851                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4852                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4853                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4854                                         txctrl);
4855                         break;
4856
4857                 case ixgbe_mac_82599EB:
4858                 case ixgbe_mac_X540:
4859                 case ixgbe_mac_X550:
4860                 case ixgbe_mac_X550EM_x:
4861                 case ixgbe_mac_X550EM_a:
4862                 default:
4863                         txctrl = IXGBE_READ_REG(hw,
4864                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4865                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4866                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4867                                         txctrl);
4868                         break;
4869                 }
4870         }
4871
4872         /* Device configured with multiple TX queues. */
4873         ixgbe_dev_mq_tx_configure(dev);
4874 }
4875
4876 /*
4877  * Set up link for 82599 loopback mode Tx->Rx.
4878  */
4879 static inline void __attribute__((cold))
4880 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4881 {
4882         PMD_INIT_FUNC_TRACE();
4883
4884         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4885                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4886                                 IXGBE_SUCCESS) {
4887                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4888                         /* ignore error */
4889                         return;
4890                 }
4891         }
4892
4893         /* Restart link */
4894         IXGBE_WRITE_REG(hw,
4895                         IXGBE_AUTOC,
4896                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4897         ixgbe_reset_pipeline_82599(hw);
4898
4899         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4900         msec_delay(50);
4901 }
4902
4903
4904 /*
4905  * Start Transmit and Receive Units.
4906  */
4907 int __attribute__((cold))
4908 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4909 {
4910         struct ixgbe_hw     *hw;
4911         struct ixgbe_tx_queue *txq;
4912         struct ixgbe_rx_queue *rxq;
4913         uint32_t txdctl;
4914         uint32_t dmatxctl;
4915         uint32_t rxctrl;
4916         uint16_t i;
4917         int ret = 0;
4918
4919         PMD_INIT_FUNC_TRACE();
4920         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4921
4922         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4923                 txq = dev->data->tx_queues[i];
4924                 /* Setup Transmit Threshold Registers */
4925                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4926                 txdctl |= txq->pthresh & 0x7F;
4927                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4928                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4929                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4930         }
4931
4932         if (hw->mac.type != ixgbe_mac_82598EB) {
4933                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4934                 dmatxctl |= IXGBE_DMATXCTL_TE;
4935                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4936         }
4937
4938         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4939                 txq = dev->data->tx_queues[i];
4940                 if (!txq->tx_deferred_start) {
4941                         ret = ixgbe_dev_tx_queue_start(dev, i);
4942                         if (ret < 0)
4943                                 return ret;
4944                 }
4945         }
4946
4947         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4948                 rxq = dev->data->rx_queues[i];
4949                 if (!rxq->rx_deferred_start) {
4950                         ret = ixgbe_dev_rx_queue_start(dev, i);
4951                         if (ret < 0)
4952                                 return ret;
4953                 }
4954         }
4955
4956         /* Enable Receive engine */
4957         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4958         if (hw->mac.type == ixgbe_mac_82598EB)
4959                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4960         rxctrl |= IXGBE_RXCTRL_RXEN;
4961         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4962
4963         /* If loopback mode is enabled for 82599, set up the link accordingly */
4964         if (hw->mac.type == ixgbe_mac_82599EB &&
4965                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4966                 ixgbe_setup_loopback_link_82599(hw);
4967
4968         return 0;
4969 }
4970
4971 /*
4972  * Start Receive Units for specified queue.
4973  */
4974 int __attribute__((cold))
4975 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
4976 {
4977         struct ixgbe_hw     *hw;
4978         struct ixgbe_rx_queue *rxq;
4979         uint32_t rxdctl;
4980         int poll_ms;
4981
4982         PMD_INIT_FUNC_TRACE();
4983         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4984
4985         if (rx_queue_id < dev->data->nb_rx_queues) {
4986                 rxq = dev->data->rx_queues[rx_queue_id];
4987
4988                 /* Allocate buffers for descriptor rings */
4989                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
4990                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
4991                                      rx_queue_id);
4992                         return -1;
4993                 }
4994                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
4995                 rxdctl |= IXGBE_RXDCTL_ENABLE;
4996                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
4997
4998                 /* Wait until RX Enable ready */
4999                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5000                 do {
5001                         rte_delay_ms(1);
5002                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5003                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5004                 if (!poll_ms)
5005                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5006                                      rx_queue_id);
5007                 rte_wmb();
5008                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5009                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5010                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5011         } else
5012                 return -1;
5013
5014         return 0;
5015 }
5016
5017 /*
5018  * Stop Receive Units for specified queue.
5019  */
5020 int __attribute__((cold))
5021 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5022 {
5023         struct ixgbe_hw     *hw;
5024         struct ixgbe_adapter *adapter =
5025                 (struct ixgbe_adapter *)dev->data->dev_private;
5026         struct ixgbe_rx_queue *rxq;
5027         uint32_t rxdctl;
5028         int poll_ms;
5029
5030         PMD_INIT_FUNC_TRACE();
5031         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5032
5033         if (rx_queue_id < dev->data->nb_rx_queues) {
5034                 rxq = dev->data->rx_queues[rx_queue_id];
5035
5036                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5037                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5038                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5039
5040                 /* Wait until RX Enable bit clear */
5041                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5042                 do {
5043                         rte_delay_ms(1);
5044                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5045                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5046                 if (!poll_ms)
5047                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5048                                      rx_queue_id);
5049
5050                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5051
5052                 ixgbe_rx_queue_release_mbufs(rxq);
5053                 ixgbe_reset_rx_queue(adapter, rxq);
5054                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5055         } else
5056                 return -1;
5057
5058         return 0;
5059 }
5060
5061
5062 /*
5063  * Start Transmit Units for specified queue.
5064  */
5065 int __attribute__((cold))
5066 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5067 {
5068         struct ixgbe_hw     *hw;
5069         struct ixgbe_tx_queue *txq;
5070         uint32_t txdctl;
5071         int poll_ms;
5072
5073         PMD_INIT_FUNC_TRACE();
5074         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5075
5076         if (tx_queue_id < dev->data->nb_tx_queues) {
5077                 txq = dev->data->tx_queues[tx_queue_id];
5078                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5079                 txdctl |= IXGBE_TXDCTL_ENABLE;
5080                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5081
5082                 /* Wait until TX Enable ready */
5083                 if (hw->mac.type == ixgbe_mac_82599EB) {
5084                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5085                         do {
5086                                 rte_delay_ms(1);
5087                                 txdctl = IXGBE_READ_REG(hw,
5088                                         IXGBE_TXDCTL(txq->reg_idx));
5089                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5090                         if (!poll_ms)
5091                                 PMD_INIT_LOG(ERR, "Could not enable "
5092                                              "Tx Queue %d", tx_queue_id);
5093                 }
5094                 rte_wmb();
5095                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5096                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5097                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5098         } else
5099                 return -1;
5100
5101         return 0;
5102 }
5103
5104 /*
5105  * Stop Transmit Units for specified queue.
5106  */
5107 int __attribute__((cold))
5108 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5109 {
5110         struct ixgbe_hw     *hw;
5111         struct ixgbe_tx_queue *txq;
5112         uint32_t txdctl;
5113         uint32_t txtdh, txtdt;
5114         int poll_ms;
5115
5116         PMD_INIT_FUNC_TRACE();
5117         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5118
5119         if (tx_queue_id >= dev->data->nb_tx_queues)
5120                 return -1;
5121
5122         txq = dev->data->tx_queues[tx_queue_id];
5123
5124         /* Wait until TX queue is empty */
5125         if (hw->mac.type == ixgbe_mac_82599EB) {
5126                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5127                 do {
5128                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5129                         txtdh = IXGBE_READ_REG(hw,
5130                                                IXGBE_TDH(txq->reg_idx));
5131                         txtdt = IXGBE_READ_REG(hw,
5132                                                IXGBE_TDT(txq->reg_idx));
5133                 } while (--poll_ms && (txtdh != txtdt));
5134                 if (!poll_ms)
5135                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5136                                      "when stopping.", tx_queue_id);
5137         }
5138
5139         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5140         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5141         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5142
5143         /* Wait until TX Enable bit clear */
5144         if (hw->mac.type == ixgbe_mac_82599EB) {
5145                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5146                 do {
5147                         rte_delay_ms(1);
5148                         txdctl = IXGBE_READ_REG(hw,
5149                                                 IXGBE_TXDCTL(txq->reg_idx));
5150                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5151                 if (!poll_ms)
5152                         PMD_INIT_LOG(ERR, "Could not disable "
5153                                      "Tx Queue %d", tx_queue_id);
5154         }
5155
5156         if (txq->ops != NULL) {
5157                 txq->ops->release_mbufs(txq);
5158                 txq->ops->reset(txq);
5159         }
5160         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5161
5162         return 0;
5163 }
5164
5165 void
5166 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5167         struct rte_eth_rxq_info *qinfo)
5168 {
5169         struct ixgbe_rx_queue *rxq;
5170
5171         rxq = dev->data->rx_queues[queue_id];
5172
5173         qinfo->mp = rxq->mb_pool;
5174         qinfo->scattered_rx = dev->data->scattered_rx;
5175         qinfo->nb_desc = rxq->nb_rx_desc;
5176
5177         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5178         qinfo->conf.rx_drop_en = rxq->drop_en;
5179         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5180 }
5181
5182 void
5183 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5184         struct rte_eth_txq_info *qinfo)
5185 {
5186         struct ixgbe_tx_queue *txq;
5187
5188         txq = dev->data->tx_queues[queue_id];
5189
5190         qinfo->nb_desc = txq->nb_tx_desc;
5191
5192         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5193         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5194         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5195
5196         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5197         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5198         qinfo->conf.txq_flags = txq->txq_flags;
5199         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5200 }
5201
5202 /*
5203  * [VF] Initializes Receive Unit.
5204  */
5205 int __attribute__((cold))
5206 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5207 {
5208         struct ixgbe_hw     *hw;
5209         struct ixgbe_rx_queue *rxq;
5210         uint64_t bus_addr;
5211         uint32_t srrctl, psrtype = 0;
5212         uint16_t buf_size;
5213         uint16_t i;
5214         int ret;
5215
5216         PMD_INIT_FUNC_TRACE();
5217         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5218
5219         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5220                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5221                         "it should be power of 2");
5222                 return -1;
5223         }
5224
5225         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5226                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5227                         "it should be equal to or less than %d",
5228                         hw->mac.max_rx_queues);
5229                 return -1;
5230         }
5231
5232         /*
5233          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5234          * disables the VF receipt of packets if the PF MTU is > 1500.
5235          * This is done to deal with 82599 limitations that imposes
5236          * the PF and all VFs to share the same MTU.
5237          * Then, the PF driver enables again the VF receipt of packet when
5238          * the VF driver issues a IXGBE_VF_SET_LPE request.
5239          * In the meantime, the VF device cannot be used, even if the VF driver
5240          * and the Guest VM network stack are ready to accept packets with a
5241          * size up to the PF MTU.
5242          * As a work-around to this PF behaviour, force the call to
5243          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5244          * VF packets received can work in all cases.
5245          */
5246         ixgbevf_rlpml_set_vf(hw,
5247                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5248
5249         /* Setup RX queues */
5250         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5251                 rxq = dev->data->rx_queues[i];
5252
5253                 /* Allocate buffers for descriptor rings */
5254                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5255                 if (ret)
5256                         return ret;
5257
5258                 /* Setup the Base and Length of the Rx Descriptor Rings */
5259                 bus_addr = rxq->rx_ring_phys_addr;
5260
5261                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5262                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5263                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5264                                 (uint32_t)(bus_addr >> 32));
5265                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5266                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5267                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5268                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5269
5270
5271                 /* Configure the SRRCTL register */
5272 #ifdef RTE_HEADER_SPLIT_ENABLE
5273                 /*
5274                  * Configure Header Split
5275                  */
5276                 if (dev->data->dev_conf.rxmode.header_split) {
5277                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5278                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5279                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5280                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5281                 } else
5282 #endif
5283                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5284
5285                 /* Set if packets are dropped when no descriptors available */
5286                 if (rxq->drop_en)
5287                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5288
5289                 /*
5290                  * Configure the RX buffer size in the BSIZEPACKET field of
5291                  * the SRRCTL register of the queue.
5292                  * The value is in 1 KB resolution. Valid values can be from
5293                  * 1 KB to 16 KB.
5294                  */
5295                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5296                         RTE_PKTMBUF_HEADROOM);
5297                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5298                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5299
5300                 /*
5301                  * VF modification to write virtual function SRRCTL register
5302                  */
5303                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5304
5305                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5306                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5307
5308                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5309                     /* It adds dual VLAN length for supporting dual VLAN */
5310                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5311                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5312                         if (!dev->data->scattered_rx)
5313                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5314                         dev->data->scattered_rx = 1;
5315                 }
5316         }
5317
5318 #ifdef RTE_HEADER_SPLIT_ENABLE
5319         if (dev->data->dev_conf.rxmode.header_split)
5320                 /* Must setup the PSRTYPE register */
5321                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5322                         IXGBE_PSRTYPE_UDPHDR   |
5323                         IXGBE_PSRTYPE_IPV4HDR  |
5324                         IXGBE_PSRTYPE_IPV6HDR;
5325 #endif
5326
5327         /* Set RQPL for VF RSS according to max Rx queue */
5328         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5329                 IXGBE_PSRTYPE_RQPL_SHIFT;
5330         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5331
5332         ixgbe_set_rx_function(dev);
5333
5334         return 0;
5335 }
5336
5337 /*
5338  * [VF] Initializes Transmit Unit.
5339  */
5340 void __attribute__((cold))
5341 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5342 {
5343         struct ixgbe_hw     *hw;
5344         struct ixgbe_tx_queue *txq;
5345         uint64_t bus_addr;
5346         uint32_t txctrl;
5347         uint16_t i;
5348
5349         PMD_INIT_FUNC_TRACE();
5350         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5351
5352         /* Setup the Base and Length of the Tx Descriptor Rings */
5353         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5354                 txq = dev->data->tx_queues[i];
5355                 bus_addr = txq->tx_ring_phys_addr;
5356                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5357                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5358                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5359                                 (uint32_t)(bus_addr >> 32));
5360                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5361                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5362                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5363                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5364                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5365
5366                 /*
5367                  * Disable Tx Head Writeback RO bit, since this hoses
5368                  * bookkeeping if things aren't delivered in order.
5369                  */
5370                 txctrl = IXGBE_READ_REG(hw,
5371                                 IXGBE_VFDCA_TXCTRL(i));
5372                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5373                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5374                                 txctrl);
5375         }
5376 }
5377
5378 /*
5379  * [VF] Start Transmit and Receive Units.
5380  */
5381 void __attribute__((cold))
5382 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5383 {
5384         struct ixgbe_hw     *hw;
5385         struct ixgbe_tx_queue *txq;
5386         struct ixgbe_rx_queue *rxq;
5387         uint32_t txdctl;
5388         uint32_t rxdctl;
5389         uint16_t i;
5390         int poll_ms;
5391
5392         PMD_INIT_FUNC_TRACE();
5393         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5394
5395         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5396                 txq = dev->data->tx_queues[i];
5397                 /* Setup Transmit Threshold Registers */
5398                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5399                 txdctl |= txq->pthresh & 0x7F;
5400                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5401                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5402                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5403         }
5404
5405         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5406
5407                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5408                 txdctl |= IXGBE_TXDCTL_ENABLE;
5409                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5410
5411                 poll_ms = 10;
5412                 /* Wait until TX Enable ready */
5413                 do {
5414                         rte_delay_ms(1);
5415                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5416                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5417                 if (!poll_ms)
5418                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5419         }
5420         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5421
5422                 rxq = dev->data->rx_queues[i];
5423
5424                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5425                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5426                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5427
5428                 /* Wait until RX Enable ready */
5429                 poll_ms = 10;
5430                 do {
5431                         rte_delay_ms(1);
5432                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5433                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5434                 if (!poll_ms)
5435                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5436                 rte_wmb();
5437                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5438
5439         }
5440 }
5441
5442 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5443 int __attribute__((weak))
5444 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5445 {
5446         return -1;
5447 }
5448
5449 uint16_t __attribute__((weak))
5450 ixgbe_recv_pkts_vec(
5451         void __rte_unused *rx_queue,
5452         struct rte_mbuf __rte_unused **rx_pkts,
5453         uint16_t __rte_unused nb_pkts)
5454 {
5455         return 0;
5456 }
5457
5458 uint16_t __attribute__((weak))
5459 ixgbe_recv_scattered_pkts_vec(
5460         void __rte_unused *rx_queue,
5461         struct rte_mbuf __rte_unused **rx_pkts,
5462         uint16_t __rte_unused nb_pkts)
5463 {
5464         return 0;
5465 }
5466
5467 int __attribute__((weak))
5468 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5469 {
5470         return -1;
5471 }