drivers/net: do not touch mbuf next or nb segs on Rx
[dpdk.git] / drivers / net / ixgbe / ixgbe_rxtx.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   Copyright 2014 6WIND S.A.
6  *   All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include <sys/queue.h>
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <errno.h>
41 #include <stdint.h>
42 #include <stdarg.h>
43 #include <unistd.h>
44 #include <inttypes.h>
45
46 #include <rte_byteorder.h>
47 #include <rte_common.h>
48 #include <rte_cycles.h>
49 #include <rte_log.h>
50 #include <rte_debug.h>
51 #include <rte_interrupts.h>
52 #include <rte_pci.h>
53 #include <rte_memory.h>
54 #include <rte_memzone.h>
55 #include <rte_launch.h>
56 #include <rte_eal.h>
57 #include <rte_per_lcore.h>
58 #include <rte_lcore.h>
59 #include <rte_atomic.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_mempool.h>
62 #include <rte_malloc.h>
63 #include <rte_mbuf.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_prefetch.h>
67 #include <rte_udp.h>
68 #include <rte_tcp.h>
69 #include <rte_sctp.h>
70 #include <rte_string_fns.h>
71 #include <rte_errno.h>
72 #include <rte_ip.h>
73 #include <rte_net.h>
74
75 #include "ixgbe_logs.h"
76 #include "base/ixgbe_api.h"
77 #include "base/ixgbe_vf.h"
78 #include "ixgbe_ethdev.h"
79 #include "base/ixgbe_dcb.h"
80 #include "base/ixgbe_common.h"
81 #include "ixgbe_rxtx.h"
82
83 #ifdef RTE_LIBRTE_IEEE1588
84 #define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
85 #else
86 #define IXGBE_TX_IEEE1588_TMST 0
87 #endif
88 /* Bit Mask to indicate what bits required for building TX context */
89 #define IXGBE_TX_OFFLOAD_MASK (                  \
90                 PKT_TX_VLAN_PKT |                \
91                 PKT_TX_IP_CKSUM |                \
92                 PKT_TX_L4_MASK |                 \
93                 PKT_TX_TCP_SEG |                 \
94                 PKT_TX_MACSEC |                  \
95                 PKT_TX_OUTER_IP_CKSUM |          \
96                 IXGBE_TX_IEEE1588_TMST)
97
98 #define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
99                 (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
100
101 #if 1
102 #define RTE_PMD_USE_PREFETCH
103 #endif
104
105 #ifdef RTE_PMD_USE_PREFETCH
106 /*
107  * Prefetch a cache line into all cache levels.
108  */
109 #define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
110 #else
111 #define rte_ixgbe_prefetch(p)   do {} while (0)
112 #endif
113
114 #ifdef RTE_IXGBE_INC_VECTOR
115 uint16_t ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
116                                     uint16_t nb_pkts);
117 #endif
118
119 /*********************************************************************
120  *
121  *  TX functions
122  *
123  **********************************************************************/
124
125 /*
126  * Check for descriptors with their DD bit set and free mbufs.
127  * Return the total number of buffers freed.
128  */
129 static inline int __attribute__((always_inline))
130 ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
131 {
132         struct ixgbe_tx_entry *txep;
133         uint32_t status;
134         int i, nb_free = 0;
135         struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
136
137         /* check DD bit on threshold descriptor */
138         status = txq->tx_ring[txq->tx_next_dd].wb.status;
139         if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
140                 return 0;
141
142         /*
143          * first buffer to free from S/W ring is at index
144          * tx_next_dd - (tx_rs_thresh-1)
145          */
146         txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
147
148         for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
149                 /* free buffers one at a time */
150                 m = rte_pktmbuf_prefree_seg(txep->mbuf);
151                 txep->mbuf = NULL;
152
153                 if (unlikely(m == NULL))
154                         continue;
155
156                 if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
157                     (nb_free > 0 && m->pool != free[0]->pool)) {
158                         rte_mempool_put_bulk(free[0]->pool,
159                                              (void **)free, nb_free);
160                         nb_free = 0;
161                 }
162
163                 free[nb_free++] = m;
164         }
165
166         if (nb_free > 0)
167                 rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
168
169         /* buffers were freed, update counters */
170         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
171         txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
172         if (txq->tx_next_dd >= txq->nb_tx_desc)
173                 txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
174
175         return txq->tx_rs_thresh;
176 }
177
178 /* Populate 4 descriptors with data from 4 mbufs */
179 static inline void
180 tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
181 {
182         uint64_t buf_dma_addr;
183         uint32_t pkt_len;
184         int i;
185
186         for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
187                 buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
188                 pkt_len = (*pkts)->data_len;
189
190                 /* write data to descriptor */
191                 txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
192
193                 txdp->read.cmd_type_len =
194                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
195
196                 txdp->read.olinfo_status =
197                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
198
199                 rte_prefetch0(&(*pkts)->pool);
200         }
201 }
202
203 /* Populate 1 descriptor with data from 1 mbuf */
204 static inline void
205 tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
206 {
207         uint64_t buf_dma_addr;
208         uint32_t pkt_len;
209
210         buf_dma_addr = rte_mbuf_data_dma_addr(*pkts);
211         pkt_len = (*pkts)->data_len;
212
213         /* write data to descriptor */
214         txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
215         txdp->read.cmd_type_len =
216                         rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
217         txdp->read.olinfo_status =
218                         rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
219         rte_prefetch0(&(*pkts)->pool);
220 }
221
222 /*
223  * Fill H/W descriptor ring with mbuf data.
224  * Copy mbuf pointers to the S/W ring.
225  */
226 static inline void
227 ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
228                       uint16_t nb_pkts)
229 {
230         volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
231         struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
232         const int N_PER_LOOP = 4;
233         const int N_PER_LOOP_MASK = N_PER_LOOP-1;
234         int mainpart, leftover;
235         int i, j;
236
237         /*
238          * Process most of the packets in chunks of N pkts.  Any
239          * leftover packets will get processed one at a time.
240          */
241         mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
242         leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
243         for (i = 0; i < mainpart; i += N_PER_LOOP) {
244                 /* Copy N mbuf pointers to the S/W ring */
245                 for (j = 0; j < N_PER_LOOP; ++j) {
246                         (txep + i + j)->mbuf = *(pkts + i + j);
247                 }
248                 tx4(txdp + i, pkts + i);
249         }
250
251         if (unlikely(leftover > 0)) {
252                 for (i = 0; i < leftover; ++i) {
253                         (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
254                         tx1(txdp + mainpart + i, pkts + mainpart + i);
255                 }
256         }
257 }
258
259 static inline uint16_t
260 tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
261              uint16_t nb_pkts)
262 {
263         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
264         volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
265         uint16_t n = 0;
266
267         /*
268          * Begin scanning the H/W ring for done descriptors when the
269          * number of available descriptors drops below tx_free_thresh.  For
270          * each done descriptor, free the associated buffer.
271          */
272         if (txq->nb_tx_free < txq->tx_free_thresh)
273                 ixgbe_tx_free_bufs(txq);
274
275         /* Only use descriptors that are available */
276         nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
277         if (unlikely(nb_pkts == 0))
278                 return 0;
279
280         /* Use exactly nb_pkts descriptors */
281         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
282
283         /*
284          * At this point, we know there are enough descriptors in the
285          * ring to transmit all the packets.  This assumes that each
286          * mbuf contains a single segment, and that no new offloads
287          * are expected, which would require a new context descriptor.
288          */
289
290         /*
291          * See if we're going to wrap-around. If so, handle the top
292          * of the descriptor ring first, then do the bottom.  If not,
293          * the processing looks just like the "bottom" part anyway...
294          */
295         if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
296                 n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
297                 ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
298
299                 /*
300                  * We know that the last descriptor in the ring will need to
301                  * have its RS bit set because tx_rs_thresh has to be
302                  * a divisor of the ring size
303                  */
304                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
305                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
306                 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
307
308                 txq->tx_tail = 0;
309         }
310
311         /* Fill H/W descriptor ring with mbuf data */
312         ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
313         txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
314
315         /*
316          * Determine if RS bit should be set
317          * This is what we actually want:
318          *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
319          * but instead of subtracting 1 and doing >=, we can just do
320          * greater than without subtracting.
321          */
322         if (txq->tx_tail > txq->tx_next_rs) {
323                 tx_r[txq->tx_next_rs].read.cmd_type_len |=
324                         rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
325                 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
326                                                 txq->tx_rs_thresh);
327                 if (txq->tx_next_rs >= txq->nb_tx_desc)
328                         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
329         }
330
331         /*
332          * Check for wrap-around. This would only happen if we used
333          * up to the last descriptor in the ring, no more, no less.
334          */
335         if (txq->tx_tail >= txq->nb_tx_desc)
336                 txq->tx_tail = 0;
337
338         /* update tail pointer */
339         rte_wmb();
340         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
341
342         return nb_pkts;
343 }
344
345 uint16_t
346 ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
347                        uint16_t nb_pkts)
348 {
349         uint16_t nb_tx;
350
351         /* Try to transmit at least chunks of TX_MAX_BURST pkts */
352         if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
353                 return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
354
355         /* transmit more than the max burst, in chunks of TX_MAX_BURST */
356         nb_tx = 0;
357         while (nb_pkts) {
358                 uint16_t ret, n;
359
360                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
361                 ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
362                 nb_tx = (uint16_t)(nb_tx + ret);
363                 nb_pkts = (uint16_t)(nb_pkts - ret);
364                 if (ret < n)
365                         break;
366         }
367
368         return nb_tx;
369 }
370
371 #ifdef RTE_IXGBE_INC_VECTOR
372 static uint16_t
373 ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
374                     uint16_t nb_pkts)
375 {
376         uint16_t nb_tx = 0;
377         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
378
379         while (nb_pkts) {
380                 uint16_t ret, num;
381
382                 num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
383                 ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
384                                                  num);
385                 nb_tx += ret;
386                 nb_pkts -= ret;
387                 if (ret < num)
388                         break;
389         }
390
391         return nb_tx;
392 }
393 #endif
394
395 static inline void
396 ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
397                 volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
398                 uint64_t ol_flags, union ixgbe_tx_offload tx_offload)
399 {
400         uint32_t type_tucmd_mlhl;
401         uint32_t mss_l4len_idx = 0;
402         uint32_t ctx_idx;
403         uint32_t vlan_macip_lens;
404         union ixgbe_tx_offload tx_offload_mask;
405         uint32_t seqnum_seed = 0;
406
407         ctx_idx = txq->ctx_curr;
408         tx_offload_mask.data[0] = 0;
409         tx_offload_mask.data[1] = 0;
410         type_tucmd_mlhl = 0;
411
412         /* Specify which HW CTX to upload. */
413         mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
414
415         if (ol_flags & PKT_TX_VLAN_PKT) {
416                 tx_offload_mask.vlan_tci |= ~0;
417         }
418
419         /* check if TCP segmentation required for this packet */
420         if (ol_flags & PKT_TX_TCP_SEG) {
421                 /* implies IP cksum in IPv4 */
422                 if (ol_flags & PKT_TX_IP_CKSUM)
423                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
424                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
425                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
426                 else
427                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
428                                 IXGBE_ADVTXD_TUCMD_L4T_TCP |
429                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
430
431                 tx_offload_mask.l2_len |= ~0;
432                 tx_offload_mask.l3_len |= ~0;
433                 tx_offload_mask.l4_len |= ~0;
434                 tx_offload_mask.tso_segsz |= ~0;
435                 mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
436                 mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
437         } else { /* no TSO, check if hardware checksum is needed */
438                 if (ol_flags & PKT_TX_IP_CKSUM) {
439                         type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
440                         tx_offload_mask.l2_len |= ~0;
441                         tx_offload_mask.l3_len |= ~0;
442                 }
443
444                 switch (ol_flags & PKT_TX_L4_MASK) {
445                 case PKT_TX_UDP_CKSUM:
446                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
447                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
448                         mss_l4len_idx |= sizeof(struct udp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
449                         tx_offload_mask.l2_len |= ~0;
450                         tx_offload_mask.l3_len |= ~0;
451                         break;
452                 case PKT_TX_TCP_CKSUM:
453                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
454                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
455                         mss_l4len_idx |= sizeof(struct tcp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
456                         tx_offload_mask.l2_len |= ~0;
457                         tx_offload_mask.l3_len |= ~0;
458                         break;
459                 case PKT_TX_SCTP_CKSUM:
460                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
461                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
462                         mss_l4len_idx |= sizeof(struct sctp_hdr) << IXGBE_ADVTXD_L4LEN_SHIFT;
463                         tx_offload_mask.l2_len |= ~0;
464                         tx_offload_mask.l3_len |= ~0;
465                         break;
466                 default:
467                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
468                                 IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
469                         break;
470                 }
471         }
472
473         if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
474                 tx_offload_mask.outer_l2_len |= ~0;
475                 tx_offload_mask.outer_l3_len |= ~0;
476                 tx_offload_mask.l2_len |= ~0;
477                 seqnum_seed |= tx_offload.outer_l3_len
478                                << IXGBE_ADVTXD_OUTER_IPLEN;
479                 seqnum_seed |= tx_offload.l2_len
480                                << IXGBE_ADVTXD_TUNNEL_LEN;
481         }
482
483         txq->ctx_cache[ctx_idx].flags = ol_flags;
484         txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
485                 tx_offload_mask.data[0] & tx_offload.data[0];
486         txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
487                 tx_offload_mask.data[1] & tx_offload.data[1];
488         txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
489
490         ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
491         vlan_macip_lens = tx_offload.l3_len;
492         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
493                 vlan_macip_lens |= (tx_offload.outer_l2_len <<
494                                     IXGBE_ADVTXD_MACLEN_SHIFT);
495         else
496                 vlan_macip_lens |= (tx_offload.l2_len <<
497                                     IXGBE_ADVTXD_MACLEN_SHIFT);
498         vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
499         ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
500         ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
501         ctx_txd->seqnum_seed     = seqnum_seed;
502 }
503
504 /*
505  * Check which hardware context can be used. Use the existing match
506  * or create a new context descriptor.
507  */
508 static inline uint32_t
509 what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
510                    union ixgbe_tx_offload tx_offload)
511 {
512         /* If match with the current used context */
513         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
514                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
515                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
516                      & tx_offload.data[0])) &&
517                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
518                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
519                      & tx_offload.data[1]))))
520                 return txq->ctx_curr;
521
522         /* What if match with the next context  */
523         txq->ctx_curr ^= 1;
524         if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
525                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
526                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
527                      & tx_offload.data[0])) &&
528                    (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
529                     (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
530                      & tx_offload.data[1]))))
531                 return txq->ctx_curr;
532
533         /* Mismatch, use the previous context */
534         return IXGBE_CTX_NUM;
535 }
536
537 static inline uint32_t
538 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
539 {
540         uint32_t tmp = 0;
541
542         if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
543                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
544         if (ol_flags & PKT_TX_IP_CKSUM)
545                 tmp |= IXGBE_ADVTXD_POPTS_IXSM;
546         if (ol_flags & PKT_TX_TCP_SEG)
547                 tmp |= IXGBE_ADVTXD_POPTS_TXSM;
548         return tmp;
549 }
550
551 static inline uint32_t
552 tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
553 {
554         uint32_t cmdtype = 0;
555
556         if (ol_flags & PKT_TX_VLAN_PKT)
557                 cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
558         if (ol_flags & PKT_TX_TCP_SEG)
559                 cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
560         if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
561                 cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
562         if (ol_flags & PKT_TX_MACSEC)
563                 cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
564         return cmdtype;
565 }
566
567 /* Default RS bit threshold values */
568 #ifndef DEFAULT_TX_RS_THRESH
569 #define DEFAULT_TX_RS_THRESH   32
570 #endif
571 #ifndef DEFAULT_TX_FREE_THRESH
572 #define DEFAULT_TX_FREE_THRESH 32
573 #endif
574
575 /* Reset transmit descriptors after they have been used */
576 static inline int
577 ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
578 {
579         struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
580         volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
581         uint16_t last_desc_cleaned = txq->last_desc_cleaned;
582         uint16_t nb_tx_desc = txq->nb_tx_desc;
583         uint16_t desc_to_clean_to;
584         uint16_t nb_tx_to_clean;
585         uint32_t status;
586
587         /* Determine the last descriptor needing to be cleaned */
588         desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
589         if (desc_to_clean_to >= nb_tx_desc)
590                 desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
591
592         /* Check to make sure the last descriptor to clean is done */
593         desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
594         status = txr[desc_to_clean_to].wb.status;
595         if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
596                 PMD_TX_FREE_LOG(DEBUG,
597                                 "TX descriptor %4u is not done"
598                                 "(port=%d queue=%d)",
599                                 desc_to_clean_to,
600                                 txq->port_id, txq->queue_id);
601                 /* Failed to clean any descriptors, better luck next time */
602                 return -(1);
603         }
604
605         /* Figure out how many descriptors will be cleaned */
606         if (last_desc_cleaned > desc_to_clean_to)
607                 nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
608                                                         desc_to_clean_to);
609         else
610                 nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
611                                                 last_desc_cleaned);
612
613         PMD_TX_FREE_LOG(DEBUG,
614                         "Cleaning %4u TX descriptors: %4u to %4u "
615                         "(port=%d queue=%d)",
616                         nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
617                         txq->port_id, txq->queue_id);
618
619         /*
620          * The last descriptor to clean is done, so that means all the
621          * descriptors from the last descriptor that was cleaned
622          * up to the last descriptor with the RS bit set
623          * are done. Only reset the threshold descriptor.
624          */
625         txr[desc_to_clean_to].wb.status = 0;
626
627         /* Update the txq to reflect the last descriptor that was cleaned */
628         txq->last_desc_cleaned = desc_to_clean_to;
629         txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
630
631         /* No Error */
632         return 0;
633 }
634
635 uint16_t
636 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
637                 uint16_t nb_pkts)
638 {
639         struct ixgbe_tx_queue *txq;
640         struct ixgbe_tx_entry *sw_ring;
641         struct ixgbe_tx_entry *txe, *txn;
642         volatile union ixgbe_adv_tx_desc *txr;
643         volatile union ixgbe_adv_tx_desc *txd, *txp;
644         struct rte_mbuf     *tx_pkt;
645         struct rte_mbuf     *m_seg;
646         uint64_t buf_dma_addr;
647         uint32_t olinfo_status;
648         uint32_t cmd_type_len;
649         uint32_t pkt_len;
650         uint16_t slen;
651         uint64_t ol_flags;
652         uint16_t tx_id;
653         uint16_t tx_last;
654         uint16_t nb_tx;
655         uint16_t nb_used;
656         uint64_t tx_ol_req;
657         uint32_t ctx = 0;
658         uint32_t new_ctx;
659         union ixgbe_tx_offload tx_offload;
660
661         tx_offload.data[0] = 0;
662         tx_offload.data[1] = 0;
663         txq = tx_queue;
664         sw_ring = txq->sw_ring;
665         txr     = txq->tx_ring;
666         tx_id   = txq->tx_tail;
667         txe = &sw_ring[tx_id];
668         txp = NULL;
669
670         /* Determine if the descriptor ring needs to be cleaned. */
671         if (txq->nb_tx_free < txq->tx_free_thresh)
672                 ixgbe_xmit_cleanup(txq);
673
674         rte_prefetch0(&txe->mbuf->pool);
675
676         /* TX loop */
677         for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
678                 new_ctx = 0;
679                 tx_pkt = *tx_pkts++;
680                 pkt_len = tx_pkt->pkt_len;
681
682                 /*
683                  * Determine how many (if any) context descriptors
684                  * are needed for offload functionality.
685                  */
686                 ol_flags = tx_pkt->ol_flags;
687
688                 /* If hardware offload required */
689                 tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
690                 if (tx_ol_req) {
691                         tx_offload.l2_len = tx_pkt->l2_len;
692                         tx_offload.l3_len = tx_pkt->l3_len;
693                         tx_offload.l4_len = tx_pkt->l4_len;
694                         tx_offload.vlan_tci = tx_pkt->vlan_tci;
695                         tx_offload.tso_segsz = tx_pkt->tso_segsz;
696                         tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
697                         tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
698
699                         /* If new context need be built or reuse the exist ctx. */
700                         ctx = what_advctx_update(txq, tx_ol_req,
701                                 tx_offload);
702                         /* Only allocate context descriptor if required*/
703                         new_ctx = (ctx == IXGBE_CTX_NUM);
704                         ctx = txq->ctx_curr;
705                 }
706
707                 /*
708                  * Keep track of how many descriptors are used this loop
709                  * This will always be the number of segments + the number of
710                  * Context descriptors required to transmit the packet
711                  */
712                 nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
713
714                 if (txp != NULL &&
715                                 nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
716                         /* set RS on the previous packet in the burst */
717                         txp->read.cmd_type_len |=
718                                 rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
719
720                 /*
721                  * The number of descriptors that must be allocated for a
722                  * packet is the number of segments of that packet, plus 1
723                  * Context Descriptor for the hardware offload, if any.
724                  * Determine the last TX descriptor to allocate in the TX ring
725                  * for the packet, starting from the current position (tx_id)
726                  * in the ring.
727                  */
728                 tx_last = (uint16_t) (tx_id + nb_used - 1);
729
730                 /* Circular ring */
731                 if (tx_last >= txq->nb_tx_desc)
732                         tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
733
734                 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
735                            " tx_first=%u tx_last=%u",
736                            (unsigned) txq->port_id,
737                            (unsigned) txq->queue_id,
738                            (unsigned) pkt_len,
739                            (unsigned) tx_id,
740                            (unsigned) tx_last);
741
742                 /*
743                  * Make sure there are enough TX descriptors available to
744                  * transmit the entire packet.
745                  * nb_used better be less than or equal to txq->tx_rs_thresh
746                  */
747                 if (nb_used > txq->nb_tx_free) {
748                         PMD_TX_FREE_LOG(DEBUG,
749                                         "Not enough free TX descriptors "
750                                         "nb_used=%4u nb_free=%4u "
751                                         "(port=%d queue=%d)",
752                                         nb_used, txq->nb_tx_free,
753                                         txq->port_id, txq->queue_id);
754
755                         if (ixgbe_xmit_cleanup(txq) != 0) {
756                                 /* Could not clean any descriptors */
757                                 if (nb_tx == 0)
758                                         return 0;
759                                 goto end_of_tx;
760                         }
761
762                         /* nb_used better be <= txq->tx_rs_thresh */
763                         if (unlikely(nb_used > txq->tx_rs_thresh)) {
764                                 PMD_TX_FREE_LOG(DEBUG,
765                                         "The number of descriptors needed to "
766                                         "transmit the packet exceeds the "
767                                         "RS bit threshold. This will impact "
768                                         "performance."
769                                         "nb_used=%4u nb_free=%4u "
770                                         "tx_rs_thresh=%4u. "
771                                         "(port=%d queue=%d)",
772                                         nb_used, txq->nb_tx_free,
773                                         txq->tx_rs_thresh,
774                                         txq->port_id, txq->queue_id);
775                                 /*
776                                  * Loop here until there are enough TX
777                                  * descriptors or until the ring cannot be
778                                  * cleaned.
779                                  */
780                                 while (nb_used > txq->nb_tx_free) {
781                                         if (ixgbe_xmit_cleanup(txq) != 0) {
782                                                 /*
783                                                  * Could not clean any
784                                                  * descriptors
785                                                  */
786                                                 if (nb_tx == 0)
787                                                         return 0;
788                                                 goto end_of_tx;
789                                         }
790                                 }
791                         }
792                 }
793
794                 /*
795                  * By now there are enough free TX descriptors to transmit
796                  * the packet.
797                  */
798
799                 /*
800                  * Set common flags of all TX Data Descriptors.
801                  *
802                  * The following bits must be set in all Data Descriptors:
803                  *   - IXGBE_ADVTXD_DTYP_DATA
804                  *   - IXGBE_ADVTXD_DCMD_DEXT
805                  *
806                  * The following bits must be set in the first Data Descriptor
807                  * and are ignored in the other ones:
808                  *   - IXGBE_ADVTXD_DCMD_IFCS
809                  *   - IXGBE_ADVTXD_MAC_1588
810                  *   - IXGBE_ADVTXD_DCMD_VLE
811                  *
812                  * The following bits must only be set in the last Data
813                  * Descriptor:
814                  *   - IXGBE_TXD_CMD_EOP
815                  *
816                  * The following bits can be set in any Data Descriptor, but
817                  * are only set in the last Data Descriptor:
818                  *   - IXGBE_TXD_CMD_RS
819                  */
820                 cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
821                         IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
822
823 #ifdef RTE_LIBRTE_IEEE1588
824                 if (ol_flags & PKT_TX_IEEE1588_TMST)
825                         cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
826 #endif
827
828                 olinfo_status = 0;
829                 if (tx_ol_req) {
830
831                         if (ol_flags & PKT_TX_TCP_SEG) {
832                                 /* when TSO is on, paylen in descriptor is the
833                                  * not the packet len but the tcp payload len */
834                                 pkt_len -= (tx_offload.l2_len +
835                                         tx_offload.l3_len + tx_offload.l4_len);
836                         }
837
838                         /*
839                          * Setup the TX Advanced Context Descriptor if required
840                          */
841                         if (new_ctx) {
842                                 volatile struct ixgbe_adv_tx_context_desc *
843                                     ctx_txd;
844
845                                 ctx_txd = (volatile struct
846                                     ixgbe_adv_tx_context_desc *)
847                                     &txr[tx_id];
848
849                                 txn = &sw_ring[txe->next_id];
850                                 rte_prefetch0(&txn->mbuf->pool);
851
852                                 if (txe->mbuf != NULL) {
853                                         rte_pktmbuf_free_seg(txe->mbuf);
854                                         txe->mbuf = NULL;
855                                 }
856
857                                 ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
858                                         tx_offload);
859
860                                 txe->last_id = tx_last;
861                                 tx_id = txe->next_id;
862                                 txe = txn;
863                         }
864
865                         /*
866                          * Setup the TX Advanced Data Descriptor,
867                          * This path will go through
868                          * whatever new/reuse the context descriptor
869                          */
870                         cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
871                         olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
872                         olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
873                 }
874
875                 olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
876
877                 m_seg = tx_pkt;
878                 do {
879                         txd = &txr[tx_id];
880                         txn = &sw_ring[txe->next_id];
881                         rte_prefetch0(&txn->mbuf->pool);
882
883                         if (txe->mbuf != NULL)
884                                 rte_pktmbuf_free_seg(txe->mbuf);
885                         txe->mbuf = m_seg;
886
887                         /*
888                          * Set up Transmit Data Descriptor.
889                          */
890                         slen = m_seg->data_len;
891                         buf_dma_addr = rte_mbuf_data_dma_addr(m_seg);
892                         txd->read.buffer_addr =
893                                 rte_cpu_to_le_64(buf_dma_addr);
894                         txd->read.cmd_type_len =
895                                 rte_cpu_to_le_32(cmd_type_len | slen);
896                         txd->read.olinfo_status =
897                                 rte_cpu_to_le_32(olinfo_status);
898                         txe->last_id = tx_last;
899                         tx_id = txe->next_id;
900                         txe = txn;
901                         m_seg = m_seg->next;
902                 } while (m_seg != NULL);
903
904                 /*
905                  * The last packet data descriptor needs End Of Packet (EOP)
906                  */
907                 cmd_type_len |= IXGBE_TXD_CMD_EOP;
908                 txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
909                 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
910
911                 /* Set RS bit only on threshold packets' last descriptor */
912                 if (txq->nb_tx_used >= txq->tx_rs_thresh) {
913                         PMD_TX_FREE_LOG(DEBUG,
914                                         "Setting RS bit on TXD id="
915                                         "%4u (port=%d queue=%d)",
916                                         tx_last, txq->port_id, txq->queue_id);
917
918                         cmd_type_len |= IXGBE_TXD_CMD_RS;
919
920                         /* Update txq RS bit counters */
921                         txq->nb_tx_used = 0;
922                         txp = NULL;
923                 } else
924                         txp = txd;
925
926                 txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
927         }
928
929 end_of_tx:
930         /* set RS on last packet in the burst */
931         if (txp != NULL)
932                 txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
933
934         rte_wmb();
935
936         /*
937          * Set the Transmit Descriptor Tail (TDT)
938          */
939         PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
940                    (unsigned) txq->port_id, (unsigned) txq->queue_id,
941                    (unsigned) tx_id, (unsigned) nb_tx);
942         IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
943         txq->tx_tail = tx_id;
944
945         return nb_tx;
946 }
947
948 /*********************************************************************
949  *
950  *  TX prep functions
951  *
952  **********************************************************************/
953 uint16_t
954 ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
955 {
956         int i, ret;
957         uint64_t ol_flags;
958         struct rte_mbuf *m;
959         struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
960
961         for (i = 0; i < nb_pkts; i++) {
962                 m = tx_pkts[i];
963                 ol_flags = m->ol_flags;
964
965                 /**
966                  * Check if packet meets requirements for number of segments
967                  *
968                  * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
969                  *       non-TSO
970                  */
971
972                 if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
973                         rte_errno = -EINVAL;
974                         return i;
975                 }
976
977                 if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
978                         rte_errno = -ENOTSUP;
979                         return i;
980                 }
981
982 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
983                 ret = rte_validate_tx_offload(m);
984                 if (ret != 0) {
985                         rte_errno = ret;
986                         return i;
987                 }
988 #endif
989                 ret = rte_net_intel_cksum_prepare(m);
990                 if (ret != 0) {
991                         rte_errno = ret;
992                         return i;
993                 }
994         }
995
996         return i;
997 }
998
999 /*********************************************************************
1000  *
1001  *  RX functions
1002  *
1003  **********************************************************************/
1004
1005 #define IXGBE_PACKET_TYPE_ETHER                         0X00
1006 #define IXGBE_PACKET_TYPE_IPV4                          0X01
1007 #define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1008 #define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1009 #define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1010 #define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1011 #define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1012 #define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1013 #define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1014 #define IXGBE_PACKET_TYPE_IPV6                          0X04
1015 #define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1016 #define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1017 #define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1018 #define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1019 #define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1020 #define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1021 #define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1022 #define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1023 #define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1024 #define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1025 #define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1026 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1027 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1028 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1029 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1030 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1031 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1032 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1033 #define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1034 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1035 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1036 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1037 #define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1038
1039 #define IXGBE_PACKET_TYPE_NVGRE                   0X00
1040 #define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1041 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1042 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1043 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1044 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1045 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1046 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1047 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1048 #define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1049 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1050 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1051 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1052 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1053 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1054 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1055 #define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1056 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1057 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1058 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1059 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1060 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1061 #define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1062
1063 #define IXGBE_PACKET_TYPE_VXLAN                   0X80
1064 #define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1065 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1066 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1067 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1068 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1069 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1070 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1071 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1072 #define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1073 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1074 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1075 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1076 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1077 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1078 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1079 #define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1080 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1081 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1082 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1083 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1084 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1085 #define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1086
1087 #define IXGBE_PACKET_TYPE_MAX               0X80
1088 #define IXGBE_PACKET_TYPE_TN_MAX            0X100
1089 #define IXGBE_PACKET_TYPE_SHIFT             0X04
1090
1091 /* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1092 static inline uint32_t
1093 ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1094 {
1095         /**
1096          * Use 2 different table for normal packet and tunnel packet
1097          * to save the space.
1098          */
1099         static const uint32_t
1100                 ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1101                 [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1102                 [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1103                         RTE_PTYPE_L3_IPV4,
1104                 [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1105                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1106                 [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1107                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1108                 [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1109                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1110                 [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1111                         RTE_PTYPE_L3_IPV4_EXT,
1112                 [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1113                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1114                 [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1115                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1116                 [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1117                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1118                 [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1119                         RTE_PTYPE_L3_IPV6,
1120                 [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1121                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1122                 [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1123                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1124                 [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1125                         RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1126                 [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1127                         RTE_PTYPE_L3_IPV6_EXT,
1128                 [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1129                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1130                 [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1131                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1132                 [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1133                         RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1134                 [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1135                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1136                         RTE_PTYPE_INNER_L3_IPV6,
1137                 [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1138                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1139                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1140                 [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1141                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1142                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1143                 [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1144                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1145                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1146                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1147                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1148                         RTE_PTYPE_INNER_L3_IPV6,
1149                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1150                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1151                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1152                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1153                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1154                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1155                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1156                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1157                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1158                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1159                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1160                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1161                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1162                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1163                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1164                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1165                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1166                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1167                 [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1168                         RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1169                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1170                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1171                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1172                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1173                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1174                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1175                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1176                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1177                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1178                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1179                 [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1180                         RTE_PTYPE_L2_ETHER |
1181                         RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1182                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1183         };
1184
1185         static const uint32_t
1186                 ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1187                 [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1188                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1189                         RTE_PTYPE_INNER_L2_ETHER,
1190                 [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1191                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1192                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1193                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1194                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1195                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1196                 [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1197                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1198                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1199                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1200                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1201                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1202                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1203                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1204                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1205                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1206                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1207                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1208                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1209                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1210                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1211                         RTE_PTYPE_INNER_L4_TCP,
1212                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1213                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1214                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1215                         RTE_PTYPE_INNER_L4_TCP,
1216                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1217                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1219                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1220                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1221                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1222                         RTE_PTYPE_INNER_L4_TCP,
1223                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1224                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1225                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1226                         RTE_PTYPE_INNER_L3_IPV4,
1227                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1228                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1230                         RTE_PTYPE_INNER_L4_UDP,
1231                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1232                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1233                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1234                         RTE_PTYPE_INNER_L4_UDP,
1235                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1236                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1238                         RTE_PTYPE_INNER_L4_SCTP,
1239                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1242                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1243                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1244                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1245                         RTE_PTYPE_INNER_L4_UDP,
1246                 [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1247                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1248                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1249                         RTE_PTYPE_INNER_L4_SCTP,
1250                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1251                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1252                         RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1253                         RTE_PTYPE_INNER_L3_IPV4,
1254                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1255                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1257                         RTE_PTYPE_INNER_L4_SCTP,
1258                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1259                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1260                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1261                         RTE_PTYPE_INNER_L4_SCTP,
1262                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1263                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1265                         RTE_PTYPE_INNER_L4_TCP,
1266                 [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1267                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                         RTE_PTYPE_INNER_L4_UDP,
1270
1271                 [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1272                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1273                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1274                 [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1275                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1276                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1277                         RTE_PTYPE_INNER_L3_IPV4,
1278                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1279                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1280                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1281                         RTE_PTYPE_INNER_L3_IPV4_EXT,
1282                 [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1283                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                         RTE_PTYPE_INNER_L3_IPV6,
1286                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1287                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                         RTE_PTYPE_INNER_L3_IPV4,
1290                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1291                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                         RTE_PTYPE_INNER_L3_IPV6_EXT,
1294                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1295                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                         RTE_PTYPE_INNER_L3_IPV4,
1298                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1299                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1302                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1303                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1306                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1307                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                         RTE_PTYPE_INNER_L3_IPV4,
1310                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1311                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1314                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1315                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1316                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1317                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1318                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1319                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1322                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1323                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1324                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1325                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1326                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1327                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                         RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1330                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                         RTE_PTYPE_INNER_L3_IPV4,
1334                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1335                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1338                 [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1339                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                         RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1342                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1343                         RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1344                         RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1345                         RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1346                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1347                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                         RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1350                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1351                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1352                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1353                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1354                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1355                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1358                 [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1359                         RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                         RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                         RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1362         };
1363
1364         if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1365                 return RTE_PTYPE_UNKNOWN;
1366
1367         pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1368
1369         /* For tunnel packet */
1370         if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1371                 /* Remove the tunnel bit to save the space. */
1372                 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1373                 return ptype_table_tn[pkt_info];
1374         }
1375
1376         /**
1377          * For x550, if it's not tunnel,
1378          * tunnel type bit should be set to 0.
1379          * Reuse 82599's mask.
1380          */
1381         pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1382
1383         return ptype_table[pkt_info];
1384 }
1385
1386 static inline uint64_t
1387 ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1388 {
1389         static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1390                 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1391                 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1392                 PKT_RX_RSS_HASH, 0, 0, 0,
1393                 0, 0, 0,  PKT_RX_FDIR,
1394         };
1395 #ifdef RTE_LIBRTE_IEEE1588
1396         static uint64_t ip_pkt_etqf_map[8] = {
1397                 0, 0, 0, PKT_RX_IEEE1588_PTP,
1398                 0, 0, 0, 0,
1399         };
1400
1401         if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1402                 return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1403                                 ip_rss_types_map[pkt_info & 0XF];
1404         else
1405                 return ip_rss_types_map[pkt_info & 0XF];
1406 #else
1407         return ip_rss_types_map[pkt_info & 0XF];
1408 #endif
1409 }
1410
1411 static inline uint64_t
1412 rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1413 {
1414         uint64_t pkt_flags;
1415
1416         /*
1417          * Check if VLAN present only.
1418          * Do not check whether L3/L4 rx checksum done by NIC or not,
1419          * That can be found from rte_eth_rxmode.hw_ip_checksum flag
1420          */
1421         pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1422
1423 #ifdef RTE_LIBRTE_IEEE1588
1424         if (rx_status & IXGBE_RXD_STAT_TMST)
1425                 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1426 #endif
1427         return pkt_flags;
1428 }
1429
1430 static inline uint64_t
1431 rx_desc_error_to_pkt_flags(uint32_t rx_status)
1432 {
1433         uint64_t pkt_flags;
1434
1435         /*
1436          * Bit 31: IPE, IPv4 checksum error
1437          * Bit 30: L4I, L4I integrity error
1438          */
1439         static uint64_t error_to_pkt_flags_map[4] = {
1440                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1441                 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1442                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1443                 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1444         };
1445         pkt_flags = error_to_pkt_flags_map[(rx_status >>
1446                 IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1447
1448         if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1449             (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1450                 pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1451         }
1452
1453         return pkt_flags;
1454 }
1455
1456 /*
1457  * LOOK_AHEAD defines how many desc statuses to check beyond the
1458  * current descriptor.
1459  * It must be a pound define for optimal performance.
1460  * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1461  * function only works with LOOK_AHEAD=8.
1462  */
1463 #define LOOK_AHEAD 8
1464 #if (LOOK_AHEAD != 8)
1465 #error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1466 #endif
1467 static inline int
1468 ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1469 {
1470         volatile union ixgbe_adv_rx_desc *rxdp;
1471         struct ixgbe_rx_entry *rxep;
1472         struct rte_mbuf *mb;
1473         uint16_t pkt_len;
1474         uint64_t pkt_flags;
1475         int nb_dd;
1476         uint32_t s[LOOK_AHEAD];
1477         uint32_t pkt_info[LOOK_AHEAD];
1478         int i, j, nb_rx = 0;
1479         uint32_t status;
1480         uint64_t vlan_flags = rxq->vlan_flags;
1481
1482         /* get references to current descriptor and S/W ring entry */
1483         rxdp = &rxq->rx_ring[rxq->rx_tail];
1484         rxep = &rxq->sw_ring[rxq->rx_tail];
1485
1486         status = rxdp->wb.upper.status_error;
1487         /* check to make sure there is at least 1 packet to receive */
1488         if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1489                 return 0;
1490
1491         /*
1492          * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1493          * reference packets that are ready to be received.
1494          */
1495         for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1496              i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1497                 /* Read desc statuses backwards to avoid race condition */
1498                 for (j = 0; j < LOOK_AHEAD; j++)
1499                         s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1500
1501                 rte_smp_rmb();
1502
1503                 /* Compute how many status bits were set */
1504                 for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1505                                 (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1506                         ;
1507
1508                 for (j = 0; j < nb_dd; j++)
1509                         pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1510                                                        lo_dword.data);
1511
1512                 nb_rx += nb_dd;
1513
1514                 /* Translate descriptor info to mbuf format */
1515                 for (j = 0; j < nb_dd; ++j) {
1516                         mb = rxep[j].mbuf;
1517                         pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1518                                   rxq->crc_len;
1519                         mb->data_len = pkt_len;
1520                         mb->pkt_len = pkt_len;
1521                         mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1522
1523                         /* convert descriptor fields to rte mbuf flags */
1524                         pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1525                                 vlan_flags);
1526                         pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1527                         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1528                                         ((uint16_t)pkt_info[j]);
1529                         mb->ol_flags = pkt_flags;
1530                         mb->packet_type =
1531                                 ixgbe_rxd_pkt_info_to_pkt_type
1532                                         (pkt_info[j], rxq->pkt_type_mask);
1533
1534                         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1535                                 mb->hash.rss = rte_le_to_cpu_32(
1536                                     rxdp[j].wb.lower.hi_dword.rss);
1537                         else if (pkt_flags & PKT_RX_FDIR) {
1538                                 mb->hash.fdir.hash = rte_le_to_cpu_16(
1539                                     rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1540                                     IXGBE_ATR_HASH_MASK;
1541                                 mb->hash.fdir.id = rte_le_to_cpu_16(
1542                                     rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1543                         }
1544                 }
1545
1546                 /* Move mbuf pointers from the S/W ring to the stage */
1547                 for (j = 0; j < LOOK_AHEAD; ++j) {
1548                         rxq->rx_stage[i + j] = rxep[j].mbuf;
1549                 }
1550
1551                 /* stop if all requested packets could not be received */
1552                 if (nb_dd != LOOK_AHEAD)
1553                         break;
1554         }
1555
1556         /* clear software ring entries so we can cleanup correctly */
1557         for (i = 0; i < nb_rx; ++i) {
1558                 rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1559         }
1560
1561
1562         return nb_rx;
1563 }
1564
1565 static inline int
1566 ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1567 {
1568         volatile union ixgbe_adv_rx_desc *rxdp;
1569         struct ixgbe_rx_entry *rxep;
1570         struct rte_mbuf *mb;
1571         uint16_t alloc_idx;
1572         __le64 dma_addr;
1573         int diag, i;
1574
1575         /* allocate buffers in bulk directly into the S/W ring */
1576         alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1577         rxep = &rxq->sw_ring[alloc_idx];
1578         diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1579                                     rxq->rx_free_thresh);
1580         if (unlikely(diag != 0))
1581                 return -ENOMEM;
1582
1583         rxdp = &rxq->rx_ring[alloc_idx];
1584         for (i = 0; i < rxq->rx_free_thresh; ++i) {
1585                 /* populate the static rte mbuf fields */
1586                 mb = rxep[i].mbuf;
1587                 if (reset_mbuf) {
1588                         mb->port = rxq->port_id;
1589                 }
1590
1591                 rte_mbuf_refcnt_set(mb, 1);
1592                 mb->data_off = RTE_PKTMBUF_HEADROOM;
1593
1594                 /* populate the descriptors */
1595                 dma_addr = rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mb));
1596                 rxdp[i].read.hdr_addr = 0;
1597                 rxdp[i].read.pkt_addr = dma_addr;
1598         }
1599
1600         /* update state of internal queue structure */
1601         rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1602         if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1603                 rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1604
1605         /* no errors */
1606         return 0;
1607 }
1608
1609 static inline uint16_t
1610 ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1611                          uint16_t nb_pkts)
1612 {
1613         struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1614         int i;
1615
1616         /* how many packets are ready to return? */
1617         nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1618
1619         /* copy mbuf pointers to the application's packet list */
1620         for (i = 0; i < nb_pkts; ++i)
1621                 rx_pkts[i] = stage[i];
1622
1623         /* update internal queue state */
1624         rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1625         rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1626
1627         return nb_pkts;
1628 }
1629
1630 static inline uint16_t
1631 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1632              uint16_t nb_pkts)
1633 {
1634         struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1635         uint16_t nb_rx = 0;
1636
1637         /* Any previously recv'd pkts will be returned from the Rx stage */
1638         if (rxq->rx_nb_avail)
1639                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1640
1641         /* Scan the H/W ring for packets to receive */
1642         nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1643
1644         /* update internal queue state */
1645         rxq->rx_next_avail = 0;
1646         rxq->rx_nb_avail = nb_rx;
1647         rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1648
1649         /* if required, allocate new buffers to replenish descriptors */
1650         if (rxq->rx_tail > rxq->rx_free_trigger) {
1651                 uint16_t cur_free_trigger = rxq->rx_free_trigger;
1652
1653                 if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1654                         int i, j;
1655
1656                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1657                                    "queue_id=%u", (unsigned) rxq->port_id,
1658                                    (unsigned) rxq->queue_id);
1659
1660                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1661                                 rxq->rx_free_thresh;
1662
1663                         /*
1664                          * Need to rewind any previous receives if we cannot
1665                          * allocate new buffers to replenish the old ones.
1666                          */
1667                         rxq->rx_nb_avail = 0;
1668                         rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1669                         for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1670                                 rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1671
1672                         return 0;
1673                 }
1674
1675                 /* update tail pointer */
1676                 rte_wmb();
1677                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1678                                             cur_free_trigger);
1679         }
1680
1681         if (rxq->rx_tail >= rxq->nb_rx_desc)
1682                 rxq->rx_tail = 0;
1683
1684         /* received any packets this loop? */
1685         if (rxq->rx_nb_avail)
1686                 return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1687
1688         return 0;
1689 }
1690
1691 /* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1692 uint16_t
1693 ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1694                            uint16_t nb_pkts)
1695 {
1696         uint16_t nb_rx;
1697
1698         if (unlikely(nb_pkts == 0))
1699                 return 0;
1700
1701         if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1702                 return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1703
1704         /* request is relatively large, chunk it up */
1705         nb_rx = 0;
1706         while (nb_pkts) {
1707                 uint16_t ret, n;
1708
1709                 n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1710                 ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1711                 nb_rx = (uint16_t)(nb_rx + ret);
1712                 nb_pkts = (uint16_t)(nb_pkts - ret);
1713                 if (ret < n)
1714                         break;
1715         }
1716
1717         return nb_rx;
1718 }
1719
1720 uint16_t
1721 ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1722                 uint16_t nb_pkts)
1723 {
1724         struct ixgbe_rx_queue *rxq;
1725         volatile union ixgbe_adv_rx_desc *rx_ring;
1726         volatile union ixgbe_adv_rx_desc *rxdp;
1727         struct ixgbe_rx_entry *sw_ring;
1728         struct ixgbe_rx_entry *rxe;
1729         struct rte_mbuf *rxm;
1730         struct rte_mbuf *nmb;
1731         union ixgbe_adv_rx_desc rxd;
1732         uint64_t dma_addr;
1733         uint32_t staterr;
1734         uint32_t pkt_info;
1735         uint16_t pkt_len;
1736         uint16_t rx_id;
1737         uint16_t nb_rx;
1738         uint16_t nb_hold;
1739         uint64_t pkt_flags;
1740         uint64_t vlan_flags;
1741
1742         nb_rx = 0;
1743         nb_hold = 0;
1744         rxq = rx_queue;
1745         rx_id = rxq->rx_tail;
1746         rx_ring = rxq->rx_ring;
1747         sw_ring = rxq->sw_ring;
1748         vlan_flags = rxq->vlan_flags;
1749         while (nb_rx < nb_pkts) {
1750                 /*
1751                  * The order of operations here is important as the DD status
1752                  * bit must not be read after any other descriptor fields.
1753                  * rx_ring and rxdp are pointing to volatile data so the order
1754                  * of accesses cannot be reordered by the compiler. If they were
1755                  * not volatile, they could be reordered which could lead to
1756                  * using invalid descriptor fields when read from rxd.
1757                  */
1758                 rxdp = &rx_ring[rx_id];
1759                 staterr = rxdp->wb.upper.status_error;
1760                 if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1761                         break;
1762                 rxd = *rxdp;
1763
1764                 /*
1765                  * End of packet.
1766                  *
1767                  * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1768                  * is likely to be invalid and to be dropped by the various
1769                  * validation checks performed by the network stack.
1770                  *
1771                  * Allocate a new mbuf to replenish the RX ring descriptor.
1772                  * If the allocation fails:
1773                  *    - arrange for that RX descriptor to be the first one
1774                  *      being parsed the next time the receive function is
1775                  *      invoked [on the same queue].
1776                  *
1777                  *    - Stop parsing the RX ring and return immediately.
1778                  *
1779                  * This policy do not drop the packet received in the RX
1780                  * descriptor for which the allocation of a new mbuf failed.
1781                  * Thus, it allows that packet to be later retrieved if
1782                  * mbuf have been freed in the mean time.
1783                  * As a side effect, holding RX descriptors instead of
1784                  * systematically giving them back to the NIC may lead to
1785                  * RX ring exhaustion situations.
1786                  * However, the NIC can gracefully prevent such situations
1787                  * to happen by sending specific "back-pressure" flow control
1788                  * frames to its peer(s).
1789                  */
1790                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1791                            "ext_err_stat=0x%08x pkt_len=%u",
1792                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1793                            (unsigned) rx_id, (unsigned) staterr,
1794                            (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1795
1796                 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1797                 if (nmb == NULL) {
1798                         PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1799                                    "queue_id=%u", (unsigned) rxq->port_id,
1800                                    (unsigned) rxq->queue_id);
1801                         rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1802                         break;
1803                 }
1804
1805                 nb_hold++;
1806                 rxe = &sw_ring[rx_id];
1807                 rx_id++;
1808                 if (rx_id == rxq->nb_rx_desc)
1809                         rx_id = 0;
1810
1811                 /* Prefetch next mbuf while processing current one. */
1812                 rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1813
1814                 /*
1815                  * When next RX descriptor is on a cache-line boundary,
1816                  * prefetch the next 4 RX descriptors and the next 8 pointers
1817                  * to mbufs.
1818                  */
1819                 if ((rx_id & 0x3) == 0) {
1820                         rte_ixgbe_prefetch(&rx_ring[rx_id]);
1821                         rte_ixgbe_prefetch(&sw_ring[rx_id]);
1822                 }
1823
1824                 rxm = rxe->mbuf;
1825                 rxe->mbuf = nmb;
1826                 dma_addr =
1827                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
1828                 rxdp->read.hdr_addr = 0;
1829                 rxdp->read.pkt_addr = dma_addr;
1830
1831                 /*
1832                  * Initialize the returned mbuf.
1833                  * 1) setup generic mbuf fields:
1834                  *    - number of segments,
1835                  *    - next segment,
1836                  *    - packet length,
1837                  *    - RX port identifier.
1838                  * 2) integrate hardware offload data, if any:
1839                  *    - RSS flag & hash,
1840                  *    - IP checksum flag,
1841                  *    - VLAN TCI, if any,
1842                  *    - error flags.
1843                  */
1844                 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1845                                       rxq->crc_len);
1846                 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1847                 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1848                 rxm->nb_segs = 1;
1849                 rxm->next = NULL;
1850                 rxm->pkt_len = pkt_len;
1851                 rxm->data_len = pkt_len;
1852                 rxm->port = rxq->port_id;
1853
1854                 pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1855                 /* Only valid if PKT_RX_VLAN_PKT set in pkt_flags */
1856                 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1857
1858                 pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1859                 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1860                 pkt_flags = pkt_flags |
1861                         ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1862                 rxm->ol_flags = pkt_flags;
1863                 rxm->packet_type =
1864                         ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1865                                                        rxq->pkt_type_mask);
1866
1867                 if (likely(pkt_flags & PKT_RX_RSS_HASH))
1868                         rxm->hash.rss = rte_le_to_cpu_32(
1869                                                 rxd.wb.lower.hi_dword.rss);
1870                 else if (pkt_flags & PKT_RX_FDIR) {
1871                         rxm->hash.fdir.hash = rte_le_to_cpu_16(
1872                                         rxd.wb.lower.hi_dword.csum_ip.csum) &
1873                                         IXGBE_ATR_HASH_MASK;
1874                         rxm->hash.fdir.id = rte_le_to_cpu_16(
1875                                         rxd.wb.lower.hi_dword.csum_ip.ip_id);
1876                 }
1877                 /*
1878                  * Store the mbuf address into the next entry of the array
1879                  * of returned packets.
1880                  */
1881                 rx_pkts[nb_rx++] = rxm;
1882         }
1883         rxq->rx_tail = rx_id;
1884
1885         /*
1886          * If the number of free RX descriptors is greater than the RX free
1887          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1888          * register.
1889          * Update the RDT with the value of the last processed RX descriptor
1890          * minus 1, to guarantee that the RDT register is never equal to the
1891          * RDH register, which creates a "full" ring situtation from the
1892          * hardware point of view...
1893          */
1894         nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1895         if (nb_hold > rxq->rx_free_thresh) {
1896                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1897                            "nb_hold=%u nb_rx=%u",
1898                            (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1899                            (unsigned) rx_id, (unsigned) nb_hold,
1900                            (unsigned) nb_rx);
1901                 rx_id = (uint16_t) ((rx_id == 0) ?
1902                                      (rxq->nb_rx_desc - 1) : (rx_id - 1));
1903                 IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1904                 nb_hold = 0;
1905         }
1906         rxq->nb_rx_hold = nb_hold;
1907         return nb_rx;
1908 }
1909
1910 /**
1911  * Detect an RSC descriptor.
1912  */
1913 static inline uint32_t
1914 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1915 {
1916         return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1917                 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1918 }
1919
1920 /**
1921  * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1922  *
1923  * Fill the following info in the HEAD buffer of the Rx cluster:
1924  *    - RX port identifier
1925  *    - hardware offload data, if any:
1926  *      - RSS flag & hash
1927  *      - IP checksum flag
1928  *      - VLAN TCI, if any
1929  *      - error flags
1930  * @head HEAD of the packet cluster
1931  * @desc HW descriptor to get data from
1932  * @rxq Pointer to the Rx queue
1933  */
1934 static inline void
1935 ixgbe_fill_cluster_head_buf(
1936         struct rte_mbuf *head,
1937         union ixgbe_adv_rx_desc *desc,
1938         struct ixgbe_rx_queue *rxq,
1939         uint32_t staterr)
1940 {
1941         uint32_t pkt_info;
1942         uint64_t pkt_flags;
1943
1944         head->port = rxq->port_id;
1945
1946         /* The vlan_tci field is only valid when PKT_RX_VLAN_PKT is
1947          * set in the pkt_flags field.
1948          */
1949         head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1950         pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1951         pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1952         pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1953         pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1954         head->ol_flags = pkt_flags;
1955         head->packet_type =
1956                 ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1957
1958         if (likely(pkt_flags & PKT_RX_RSS_HASH))
1959                 head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1960         else if (pkt_flags & PKT_RX_FDIR) {
1961                 head->hash.fdir.hash =
1962                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1963                                                           & IXGBE_ATR_HASH_MASK;
1964                 head->hash.fdir.id =
1965                         rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1966         }
1967 }
1968
1969 /**
1970  * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1971  *
1972  * @rx_queue Rx queue handle
1973  * @rx_pkts table of received packets
1974  * @nb_pkts size of rx_pkts table
1975  * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1976  *
1977  * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1978  * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1979  *
1980  * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1981  * 1) When non-EOP RSC completion arrives:
1982  *    a) Update the HEAD of the current RSC aggregation cluster with the new
1983  *       segment's data length.
1984  *    b) Set the "next" pointer of the current segment to point to the segment
1985  *       at the NEXTP index.
1986  *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
1987  *       in the sw_rsc_ring.
1988  * 2) When EOP arrives we just update the cluster's total length and offload
1989  *    flags and deliver the cluster up to the upper layers. In our case - put it
1990  *    in the rx_pkts table.
1991  *
1992  * Returns the number of received packets/clusters (according to the "bulk
1993  * receive" interface).
1994  */
1995 static inline uint16_t
1996 ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
1997                     bool bulk_alloc)
1998 {
1999         struct ixgbe_rx_queue *rxq = rx_queue;
2000         volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2001         struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2002         struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2003         uint16_t rx_id = rxq->rx_tail;
2004         uint16_t nb_rx = 0;
2005         uint16_t nb_hold = rxq->nb_rx_hold;
2006         uint16_t prev_id = rxq->rx_tail;
2007
2008         while (nb_rx < nb_pkts) {
2009                 bool eop;
2010                 struct ixgbe_rx_entry *rxe;
2011                 struct ixgbe_scattered_rx_entry *sc_entry;
2012                 struct ixgbe_scattered_rx_entry *next_sc_entry;
2013                 struct ixgbe_rx_entry *next_rxe = NULL;
2014                 struct rte_mbuf *first_seg;
2015                 struct rte_mbuf *rxm;
2016                 struct rte_mbuf *nmb;
2017                 union ixgbe_adv_rx_desc rxd;
2018                 uint16_t data_len;
2019                 uint16_t next_id;
2020                 volatile union ixgbe_adv_rx_desc *rxdp;
2021                 uint32_t staterr;
2022
2023 next_desc:
2024                 /*
2025                  * The code in this whole file uses the volatile pointer to
2026                  * ensure the read ordering of the status and the rest of the
2027                  * descriptor fields (on the compiler level only!!!). This is so
2028                  * UGLY - why not to just use the compiler barrier instead? DPDK
2029                  * even has the rte_compiler_barrier() for that.
2030                  *
2031                  * But most importantly this is just wrong because this doesn't
2032                  * ensure memory ordering in a general case at all. For
2033                  * instance, DPDK is supposed to work on Power CPUs where
2034                  * compiler barrier may just not be enough!
2035                  *
2036                  * I tried to write only this function properly to have a
2037                  * starting point (as a part of an LRO/RSC series) but the
2038                  * compiler cursed at me when I tried to cast away the
2039                  * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2040                  * keeping it the way it is for now.
2041                  *
2042                  * The code in this file is broken in so many other places and
2043                  * will just not work on a big endian CPU anyway therefore the
2044                  * lines below will have to be revisited together with the rest
2045                  * of the ixgbe PMD.
2046                  *
2047                  * TODO:
2048                  *    - Get rid of "volatile" crap and let the compiler do its
2049                  *      job.
2050                  *    - Use the proper memory barrier (rte_rmb()) to ensure the
2051                  *      memory ordering below.
2052                  */
2053                 rxdp = &rx_ring[rx_id];
2054                 staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2055
2056                 if (!(staterr & IXGBE_RXDADV_STAT_DD))
2057                         break;
2058
2059                 rxd = *rxdp;
2060
2061                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2062                                   "staterr=0x%x data_len=%u",
2063                            rxq->port_id, rxq->queue_id, rx_id, staterr,
2064                            rte_le_to_cpu_16(rxd.wb.upper.length));
2065
2066                 if (!bulk_alloc) {
2067                         nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2068                         if (nmb == NULL) {
2069                                 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2070                                                   "port_id=%u queue_id=%u",
2071                                            rxq->port_id, rxq->queue_id);
2072
2073                                 rte_eth_devices[rxq->port_id].data->
2074                                                         rx_mbuf_alloc_failed++;
2075                                 break;
2076                         }
2077                 } else if (nb_hold > rxq->rx_free_thresh) {
2078                         uint16_t next_rdt = rxq->rx_free_trigger;
2079
2080                         if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2081                                 rte_wmb();
2082                                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2083                                                             next_rdt);
2084                                 nb_hold -= rxq->rx_free_thresh;
2085                         } else {
2086                                 PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2087                                                   "port_id=%u queue_id=%u",
2088                                            rxq->port_id, rxq->queue_id);
2089
2090                                 rte_eth_devices[rxq->port_id].data->
2091                                                         rx_mbuf_alloc_failed++;
2092                                 break;
2093                         }
2094                 }
2095
2096                 nb_hold++;
2097                 rxe = &sw_ring[rx_id];
2098                 eop = staterr & IXGBE_RXDADV_STAT_EOP;
2099
2100                 next_id = rx_id + 1;
2101                 if (next_id == rxq->nb_rx_desc)
2102                         next_id = 0;
2103
2104                 /* Prefetch next mbuf while processing current one. */
2105                 rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2106
2107                 /*
2108                  * When next RX descriptor is on a cache-line boundary,
2109                  * prefetch the next 4 RX descriptors and the next 4 pointers
2110                  * to mbufs.
2111                  */
2112                 if ((next_id & 0x3) == 0) {
2113                         rte_ixgbe_prefetch(&rx_ring[next_id]);
2114                         rte_ixgbe_prefetch(&sw_ring[next_id]);
2115                 }
2116
2117                 rxm = rxe->mbuf;
2118
2119                 if (!bulk_alloc) {
2120                         __le64 dma =
2121                           rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(nmb));
2122                         /*
2123                          * Update RX descriptor with the physical address of the
2124                          * new data buffer of the new allocated mbuf.
2125                          */
2126                         rxe->mbuf = nmb;
2127
2128                         rxm->data_off = RTE_PKTMBUF_HEADROOM;
2129                         rxdp->read.hdr_addr = 0;
2130                         rxdp->read.pkt_addr = dma;
2131                 } else
2132                         rxe->mbuf = NULL;
2133
2134                 /*
2135                  * Set data length & data buffer address of mbuf.
2136                  */
2137                 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2138                 rxm->data_len = data_len;
2139
2140                 if (!eop) {
2141                         uint16_t nextp_id;
2142                         /*
2143                          * Get next descriptor index:
2144                          *  - For RSC it's in the NEXTP field.
2145                          *  - For a scattered packet - it's just a following
2146                          *    descriptor.
2147                          */
2148                         if (ixgbe_rsc_count(&rxd))
2149                                 nextp_id =
2150                                         (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2151                                                        IXGBE_RXDADV_NEXTP_SHIFT;
2152                         else
2153                                 nextp_id = next_id;
2154
2155                         next_sc_entry = &sw_sc_ring[nextp_id];
2156                         next_rxe = &sw_ring[nextp_id];
2157                         rte_ixgbe_prefetch(next_rxe);
2158                 }
2159
2160                 sc_entry = &sw_sc_ring[rx_id];
2161                 first_seg = sc_entry->fbuf;
2162                 sc_entry->fbuf = NULL;
2163
2164                 /*
2165                  * If this is the first buffer of the received packet,
2166                  * set the pointer to the first mbuf of the packet and
2167                  * initialize its context.
2168                  * Otherwise, update the total length and the number of segments
2169                  * of the current scattered packet, and update the pointer to
2170                  * the last mbuf of the current packet.
2171                  */
2172                 if (first_seg == NULL) {
2173                         first_seg = rxm;
2174                         first_seg->pkt_len = data_len;
2175                         first_seg->nb_segs = 1;
2176                 } else {
2177                         first_seg->pkt_len += data_len;
2178                         first_seg->nb_segs++;
2179                 }
2180
2181                 prev_id = rx_id;
2182                 rx_id = next_id;
2183
2184                 /*
2185                  * If this is not the last buffer of the received packet, update
2186                  * the pointer to the first mbuf at the NEXTP entry in the
2187                  * sw_sc_ring and continue to parse the RX ring.
2188                  */
2189                 if (!eop && next_rxe) {
2190                         rxm->next = next_rxe->mbuf;
2191                         next_sc_entry->fbuf = first_seg;
2192                         goto next_desc;
2193                 }
2194
2195                 /* Initialize the first mbuf of the returned packet */
2196                 ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2197
2198                 /*
2199                  * Deal with the case, when HW CRC srip is disabled.
2200                  * That can't happen when LRO is enabled, but still could
2201                  * happen for scattered RX mode.
2202                  */
2203                 first_seg->pkt_len -= rxq->crc_len;
2204                 if (unlikely(rxm->data_len <= rxq->crc_len)) {
2205                         struct rte_mbuf *lp;
2206
2207                         for (lp = first_seg; lp->next != rxm; lp = lp->next)
2208                                 ;
2209
2210                         first_seg->nb_segs--;
2211                         lp->data_len -= rxq->crc_len - rxm->data_len;
2212                         lp->next = NULL;
2213                         rte_pktmbuf_free_seg(rxm);
2214                 } else
2215                         rxm->data_len -= rxq->crc_len;
2216
2217                 /* Prefetch data of first segment, if configured to do so. */
2218                 rte_packet_prefetch((char *)first_seg->buf_addr +
2219                         first_seg->data_off);
2220
2221                 /*
2222                  * Store the mbuf address into the next entry of the array
2223                  * of returned packets.
2224                  */
2225                 rx_pkts[nb_rx++] = first_seg;
2226         }
2227
2228         /*
2229          * Record index of the next RX descriptor to probe.
2230          */
2231         rxq->rx_tail = rx_id;
2232
2233         /*
2234          * If the number of free RX descriptors is greater than the RX free
2235          * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2236          * register.
2237          * Update the RDT with the value of the last processed RX descriptor
2238          * minus 1, to guarantee that the RDT register is never equal to the
2239          * RDH register, which creates a "full" ring situtation from the
2240          * hardware point of view...
2241          */
2242         if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2243                 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2244                            "nb_hold=%u nb_rx=%u",
2245                            rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2246
2247                 rte_wmb();
2248                 IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2249                 nb_hold = 0;
2250         }
2251
2252         rxq->nb_rx_hold = nb_hold;
2253         return nb_rx;
2254 }
2255
2256 uint16_t
2257 ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2258                                  uint16_t nb_pkts)
2259 {
2260         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2261 }
2262
2263 uint16_t
2264 ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2265                                uint16_t nb_pkts)
2266 {
2267         return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2268 }
2269
2270 /*********************************************************************
2271  *
2272  *  Queue management functions
2273  *
2274  **********************************************************************/
2275
2276 static void __attribute__((cold))
2277 ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2278 {
2279         unsigned i;
2280
2281         if (txq->sw_ring != NULL) {
2282                 for (i = 0; i < txq->nb_tx_desc; i++) {
2283                         if (txq->sw_ring[i].mbuf != NULL) {
2284                                 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2285                                 txq->sw_ring[i].mbuf = NULL;
2286                         }
2287                 }
2288         }
2289 }
2290
2291 static void __attribute__((cold))
2292 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2293 {
2294         if (txq != NULL &&
2295             txq->sw_ring != NULL)
2296                 rte_free(txq->sw_ring);
2297 }
2298
2299 static void __attribute__((cold))
2300 ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2301 {
2302         if (txq != NULL && txq->ops != NULL) {
2303                 txq->ops->release_mbufs(txq);
2304                 txq->ops->free_swring(txq);
2305                 rte_free(txq);
2306         }
2307 }
2308
2309 void __attribute__((cold))
2310 ixgbe_dev_tx_queue_release(void *txq)
2311 {
2312         ixgbe_tx_queue_release(txq);
2313 }
2314
2315 /* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2316 static void __attribute__((cold))
2317 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2318 {
2319         static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2320         struct ixgbe_tx_entry *txe = txq->sw_ring;
2321         uint16_t prev, i;
2322
2323         /* Zero out HW ring memory */
2324         for (i = 0; i < txq->nb_tx_desc; i++) {
2325                 txq->tx_ring[i] = zeroed_desc;
2326         }
2327
2328         /* Initialize SW ring entries */
2329         prev = (uint16_t) (txq->nb_tx_desc - 1);
2330         for (i = 0; i < txq->nb_tx_desc; i++) {
2331                 volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2332
2333                 txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2334                 txe[i].mbuf = NULL;
2335                 txe[i].last_id = i;
2336                 txe[prev].next_id = i;
2337                 prev = i;
2338         }
2339
2340         txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2341         txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2342
2343         txq->tx_tail = 0;
2344         txq->nb_tx_used = 0;
2345         /*
2346          * Always allow 1 descriptor to be un-allocated to avoid
2347          * a H/W race condition
2348          */
2349         txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2350         txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2351         txq->ctx_curr = 0;
2352         memset((void *)&txq->ctx_cache, 0,
2353                 IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2354 }
2355
2356 static const struct ixgbe_txq_ops def_txq_ops = {
2357         .release_mbufs = ixgbe_tx_queue_release_mbufs,
2358         .free_swring = ixgbe_tx_free_swring,
2359         .reset = ixgbe_reset_tx_queue,
2360 };
2361
2362 /* Takes an ethdev and a queue and sets up the tx function to be used based on
2363  * the queue parameters. Used in tx_queue_setup by primary process and then
2364  * in dev_init by secondary process when attaching to an existing ethdev.
2365  */
2366 void __attribute__((cold))
2367 ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2368 {
2369         /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2370         if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS)
2371                         && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2372                 PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2373                 dev->tx_pkt_prepare = NULL;
2374 #ifdef RTE_IXGBE_INC_VECTOR
2375                 if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2376                                 (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2377                                         ixgbe_txq_vec_setup(txq) == 0)) {
2378                         PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2379                         dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2380                 } else
2381 #endif
2382                 dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2383         } else {
2384                 PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2385                 PMD_INIT_LOG(DEBUG,
2386                                 " - txq_flags = %lx " "[IXGBE_SIMPLE_FLAGS=%lx]",
2387                                 (unsigned long)txq->txq_flags,
2388                                 (unsigned long)IXGBE_SIMPLE_FLAGS);
2389                 PMD_INIT_LOG(DEBUG,
2390                                 " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2391                                 (unsigned long)txq->tx_rs_thresh,
2392                                 (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2393                 dev->tx_pkt_burst = ixgbe_xmit_pkts;
2394                 dev->tx_pkt_prepare = ixgbe_prep_pkts;
2395         }
2396 }
2397
2398 int __attribute__((cold))
2399 ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2400                          uint16_t queue_idx,
2401                          uint16_t nb_desc,
2402                          unsigned int socket_id,
2403                          const struct rte_eth_txconf *tx_conf)
2404 {
2405         const struct rte_memzone *tz;
2406         struct ixgbe_tx_queue *txq;
2407         struct ixgbe_hw     *hw;
2408         uint16_t tx_rs_thresh, tx_free_thresh;
2409
2410         PMD_INIT_FUNC_TRACE();
2411         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2412
2413         /*
2414          * Validate number of transmit descriptors.
2415          * It must not exceed hardware maximum, and must be multiple
2416          * of IXGBE_ALIGN.
2417          */
2418         if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2419                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2420                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2421                 return -EINVAL;
2422         }
2423
2424         /*
2425          * The following two parameters control the setting of the RS bit on
2426          * transmit descriptors.
2427          * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2428          * descriptors have been used.
2429          * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2430          * descriptors are used or if the number of descriptors required
2431          * to transmit a packet is greater than the number of free TX
2432          * descriptors.
2433          * The following constraints must be satisfied:
2434          *  tx_rs_thresh must be greater than 0.
2435          *  tx_rs_thresh must be less than the size of the ring minus 2.
2436          *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2437          *  tx_rs_thresh must be a divisor of the ring size.
2438          *  tx_free_thresh must be greater than 0.
2439          *  tx_free_thresh must be less than the size of the ring minus 3.
2440          * One descriptor in the TX ring is used as a sentinel to avoid a
2441          * H/W race condition, hence the maximum threshold constraints.
2442          * When set to zero use default values.
2443          */
2444         tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
2445                         tx_conf->tx_rs_thresh : DEFAULT_TX_RS_THRESH);
2446         tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2447                         tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2448         if (tx_rs_thresh >= (nb_desc - 2)) {
2449                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2450                         "of TX descriptors minus 2. (tx_rs_thresh=%u "
2451                         "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2452                         (int)dev->data->port_id, (int)queue_idx);
2453                 return -(EINVAL);
2454         }
2455         if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2456                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2457                         "(tx_rs_thresh=%u port=%d queue=%d)",
2458                         DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2459                         (int)dev->data->port_id, (int)queue_idx);
2460                 return -(EINVAL);
2461         }
2462         if (tx_free_thresh >= (nb_desc - 3)) {
2463                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2464                              "tx_free_thresh must be less than the number of "
2465                              "TX descriptors minus 3. (tx_free_thresh=%u "
2466                              "port=%d queue=%d)",
2467                              (unsigned int)tx_free_thresh,
2468                              (int)dev->data->port_id, (int)queue_idx);
2469                 return -(EINVAL);
2470         }
2471         if (tx_rs_thresh > tx_free_thresh) {
2472                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2473                              "tx_free_thresh. (tx_free_thresh=%u "
2474                              "tx_rs_thresh=%u port=%d queue=%d)",
2475                              (unsigned int)tx_free_thresh,
2476                              (unsigned int)tx_rs_thresh,
2477                              (int)dev->data->port_id,
2478                              (int)queue_idx);
2479                 return -(EINVAL);
2480         }
2481         if ((nb_desc % tx_rs_thresh) != 0) {
2482                 PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2483                              "number of TX descriptors. (tx_rs_thresh=%u "
2484                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2485                              (int)dev->data->port_id, (int)queue_idx);
2486                 return -(EINVAL);
2487         }
2488
2489         /*
2490          * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2491          * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2492          * by the NIC and all descriptors are written back after the NIC
2493          * accumulates WTHRESH descriptors.
2494          */
2495         if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2496                 PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2497                              "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2498                              "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2499                              (int)dev->data->port_id, (int)queue_idx);
2500                 return -(EINVAL);
2501         }
2502
2503         /* Free memory prior to re-allocation if needed... */
2504         if (dev->data->tx_queues[queue_idx] != NULL) {
2505                 ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2506                 dev->data->tx_queues[queue_idx] = NULL;
2507         }
2508
2509         /* First allocate the tx queue data structure */
2510         txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2511                                  RTE_CACHE_LINE_SIZE, socket_id);
2512         if (txq == NULL)
2513                 return -ENOMEM;
2514
2515         /*
2516          * Allocate TX ring hardware descriptors. A memzone large enough to
2517          * handle the maximum ring size is allocated in order to allow for
2518          * resizing in later calls to the queue setup function.
2519          */
2520         tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2521                         sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2522                         IXGBE_ALIGN, socket_id);
2523         if (tz == NULL) {
2524                 ixgbe_tx_queue_release(txq);
2525                 return -ENOMEM;
2526         }
2527
2528         txq->nb_tx_desc = nb_desc;
2529         txq->tx_rs_thresh = tx_rs_thresh;
2530         txq->tx_free_thresh = tx_free_thresh;
2531         txq->pthresh = tx_conf->tx_thresh.pthresh;
2532         txq->hthresh = tx_conf->tx_thresh.hthresh;
2533         txq->wthresh = tx_conf->tx_thresh.wthresh;
2534         txq->queue_id = queue_idx;
2535         txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2536                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2537         txq->port_id = dev->data->port_id;
2538         txq->txq_flags = tx_conf->txq_flags;
2539         txq->ops = &def_txq_ops;
2540         txq->tx_deferred_start = tx_conf->tx_deferred_start;
2541
2542         /*
2543          * Modification to set VFTDT for virtual function if vf is detected
2544          */
2545         if (hw->mac.type == ixgbe_mac_82599_vf ||
2546             hw->mac.type == ixgbe_mac_X540_vf ||
2547             hw->mac.type == ixgbe_mac_X550_vf ||
2548             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2549             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2550                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2551         else
2552                 txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2553
2554         txq->tx_ring_phys_addr = rte_mem_phy2mch(tz->memseg_id, tz->phys_addr);
2555         txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2556
2557         /* Allocate software ring */
2558         txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2559                                 sizeof(struct ixgbe_tx_entry) * nb_desc,
2560                                 RTE_CACHE_LINE_SIZE, socket_id);
2561         if (txq->sw_ring == NULL) {
2562                 ixgbe_tx_queue_release(txq);
2563                 return -ENOMEM;
2564         }
2565         PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2566                      txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2567
2568         /* set up vector or scalar TX function as appropriate */
2569         ixgbe_set_tx_function(dev, txq);
2570
2571         txq->ops->reset(txq);
2572
2573         dev->data->tx_queues[queue_idx] = txq;
2574
2575
2576         return 0;
2577 }
2578
2579 /**
2580  * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2581  *
2582  * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2583  * in the sw_rsc_ring is not set to NULL but rather points to the next
2584  * mbuf of this RSC aggregation (that has not been completed yet and still
2585  * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2586  * will just free first "nb_segs" segments of the cluster explicitly by calling
2587  * an rte_pktmbuf_free_seg().
2588  *
2589  * @m scattered cluster head
2590  */
2591 static void __attribute__((cold))
2592 ixgbe_free_sc_cluster(struct rte_mbuf *m)
2593 {
2594         uint8_t i, nb_segs = m->nb_segs;
2595         struct rte_mbuf *next_seg;
2596
2597         for (i = 0; i < nb_segs; i++) {
2598                 next_seg = m->next;
2599                 rte_pktmbuf_free_seg(m);
2600                 m = next_seg;
2601         }
2602 }
2603
2604 static void __attribute__((cold))
2605 ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2606 {
2607         unsigned i;
2608
2609 #ifdef RTE_IXGBE_INC_VECTOR
2610         /* SSE Vector driver has a different way of releasing mbufs. */
2611         if (rxq->rx_using_sse) {
2612                 ixgbe_rx_queue_release_mbufs_vec(rxq);
2613                 return;
2614         }
2615 #endif
2616
2617         if (rxq->sw_ring != NULL) {
2618                 for (i = 0; i < rxq->nb_rx_desc; i++) {
2619                         if (rxq->sw_ring[i].mbuf != NULL) {
2620                                 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2621                                 rxq->sw_ring[i].mbuf = NULL;
2622                         }
2623                 }
2624                 if (rxq->rx_nb_avail) {
2625                         for (i = 0; i < rxq->rx_nb_avail; ++i) {
2626                                 struct rte_mbuf *mb;
2627
2628                                 mb = rxq->rx_stage[rxq->rx_next_avail + i];
2629                                 rte_pktmbuf_free_seg(mb);
2630                         }
2631                         rxq->rx_nb_avail = 0;
2632                 }
2633         }
2634
2635         if (rxq->sw_sc_ring)
2636                 for (i = 0; i < rxq->nb_rx_desc; i++)
2637                         if (rxq->sw_sc_ring[i].fbuf) {
2638                                 ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2639                                 rxq->sw_sc_ring[i].fbuf = NULL;
2640                         }
2641 }
2642
2643 static void __attribute__((cold))
2644 ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2645 {
2646         if (rxq != NULL) {
2647                 ixgbe_rx_queue_release_mbufs(rxq);
2648                 rte_free(rxq->sw_ring);
2649                 rte_free(rxq->sw_sc_ring);
2650                 rte_free(rxq);
2651         }
2652 }
2653
2654 void __attribute__((cold))
2655 ixgbe_dev_rx_queue_release(void *rxq)
2656 {
2657         ixgbe_rx_queue_release(rxq);
2658 }
2659
2660 /*
2661  * Check if Rx Burst Bulk Alloc function can be used.
2662  * Return
2663  *        0: the preconditions are satisfied and the bulk allocation function
2664  *           can be used.
2665  *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2666  *           function must be used.
2667  */
2668 static inline int __attribute__((cold))
2669 check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2670 {
2671         int ret = 0;
2672
2673         /*
2674          * Make sure the following pre-conditions are satisfied:
2675          *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2676          *   rxq->rx_free_thresh < rxq->nb_rx_desc
2677          *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2678          * Scattered packets are not supported.  This should be checked
2679          * outside of this function.
2680          */
2681         if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2682                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2683                              "rxq->rx_free_thresh=%d, "
2684                              "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2685                              rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2686                 ret = -EINVAL;
2687         } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2688                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2689                              "rxq->rx_free_thresh=%d, "
2690                              "rxq->nb_rx_desc=%d",
2691                              rxq->rx_free_thresh, rxq->nb_rx_desc);
2692                 ret = -EINVAL;
2693         } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2694                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2695                              "rxq->nb_rx_desc=%d, "
2696                              "rxq->rx_free_thresh=%d",
2697                              rxq->nb_rx_desc, rxq->rx_free_thresh);
2698                 ret = -EINVAL;
2699         }
2700
2701         return ret;
2702 }
2703
2704 /* Reset dynamic ixgbe_rx_queue fields back to defaults */
2705 static void __attribute__((cold))
2706 ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2707 {
2708         static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2709         unsigned i;
2710         uint16_t len = rxq->nb_rx_desc;
2711
2712         /*
2713          * By default, the Rx queue setup function allocates enough memory for
2714          * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2715          * extra memory at the end of the descriptor ring to be zero'd out.
2716          */
2717         if (adapter->rx_bulk_alloc_allowed)
2718                 /* zero out extra memory */
2719                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2720
2721         /*
2722          * Zero out HW ring memory. Zero out extra memory at the end of
2723          * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2724          * reads extra memory as zeros.
2725          */
2726         for (i = 0; i < len; i++) {
2727                 rxq->rx_ring[i] = zeroed_desc;
2728         }
2729
2730         /*
2731          * initialize extra software ring entries. Space for these extra
2732          * entries is always allocated
2733          */
2734         memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2735         for (i = rxq->nb_rx_desc; i < len; ++i) {
2736                 rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2737         }
2738
2739         rxq->rx_nb_avail = 0;
2740         rxq->rx_next_avail = 0;
2741         rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2742         rxq->rx_tail = 0;
2743         rxq->nb_rx_hold = 0;
2744         rxq->pkt_first_seg = NULL;
2745         rxq->pkt_last_seg = NULL;
2746
2747 #ifdef RTE_IXGBE_INC_VECTOR
2748         rxq->rxrearm_start = 0;
2749         rxq->rxrearm_nb = 0;
2750 #endif
2751 }
2752
2753 int __attribute__((cold))
2754 ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
2755                          uint16_t queue_idx,
2756                          uint16_t nb_desc,
2757                          unsigned int socket_id,
2758                          const struct rte_eth_rxconf *rx_conf,
2759                          struct rte_mempool *mp)
2760 {
2761         const struct rte_memzone *rz;
2762         struct ixgbe_rx_queue *rxq;
2763         struct ixgbe_hw     *hw;
2764         uint16_t len;
2765         struct ixgbe_adapter *adapter =
2766                 (struct ixgbe_adapter *)dev->data->dev_private;
2767
2768         PMD_INIT_FUNC_TRACE();
2769         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2770
2771         /*
2772          * Validate number of receive descriptors.
2773          * It must not exceed hardware maximum, and must be multiple
2774          * of IXGBE_ALIGN.
2775          */
2776         if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
2777                         (nb_desc > IXGBE_MAX_RING_DESC) ||
2778                         (nb_desc < IXGBE_MIN_RING_DESC)) {
2779                 return -EINVAL;
2780         }
2781
2782         /* Free memory prior to re-allocation if needed... */
2783         if (dev->data->rx_queues[queue_idx] != NULL) {
2784                 ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
2785                 dev->data->rx_queues[queue_idx] = NULL;
2786         }
2787
2788         /* First allocate the rx queue data structure */
2789         rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
2790                                  RTE_CACHE_LINE_SIZE, socket_id);
2791         if (rxq == NULL)
2792                 return -ENOMEM;
2793         rxq->mb_pool = mp;
2794         rxq->nb_rx_desc = nb_desc;
2795         rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2796         rxq->queue_id = queue_idx;
2797         rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2798                 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2799         rxq->port_id = dev->data->port_id;
2800         rxq->crc_len = (uint8_t) ((dev->data->dev_conf.rxmode.hw_strip_crc) ?
2801                                                         0 : ETHER_CRC_LEN);
2802         rxq->drop_en = rx_conf->rx_drop_en;
2803         rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2804
2805         /*
2806          * The packet type in RX descriptor is different for different NICs.
2807          * Some bits are used for x550 but reserved for other NICS.
2808          * So set different masks for different NICs.
2809          */
2810         if (hw->mac.type == ixgbe_mac_X550 ||
2811             hw->mac.type == ixgbe_mac_X550EM_x ||
2812             hw->mac.type == ixgbe_mac_X550EM_a ||
2813             hw->mac.type == ixgbe_mac_X550_vf ||
2814             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2815             hw->mac.type == ixgbe_mac_X550EM_a_vf)
2816                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
2817         else
2818                 rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
2819
2820         /*
2821          * Allocate RX ring hardware descriptors. A memzone large enough to
2822          * handle the maximum ring size is allocated in order to allow for
2823          * resizing in later calls to the queue setup function.
2824          */
2825         rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2826                                       RX_RING_SZ, IXGBE_ALIGN, socket_id);
2827         if (rz == NULL) {
2828                 ixgbe_rx_queue_release(rxq);
2829                 return -ENOMEM;
2830         }
2831
2832         /*
2833          * Zero init all the descriptors in the ring.
2834          */
2835         memset(rz->addr, 0, RX_RING_SZ);
2836
2837         /*
2838          * Modified to setup VFRDT for Virtual Function
2839          */
2840         if (hw->mac.type == ixgbe_mac_82599_vf ||
2841             hw->mac.type == ixgbe_mac_X540_vf ||
2842             hw->mac.type == ixgbe_mac_X550_vf ||
2843             hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2844             hw->mac.type == ixgbe_mac_X550EM_a_vf) {
2845                 rxq->rdt_reg_addr =
2846                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
2847                 rxq->rdh_reg_addr =
2848                         IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
2849         } else {
2850                 rxq->rdt_reg_addr =
2851                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
2852                 rxq->rdh_reg_addr =
2853                         IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
2854         }
2855
2856         rxq->rx_ring_phys_addr = rte_mem_phy2mch(rz->memseg_id, rz->phys_addr);
2857         rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
2858
2859         /*
2860          * Certain constraints must be met in order to use the bulk buffer
2861          * allocation Rx burst function. If any of Rx queues doesn't meet them
2862          * the feature should be disabled for the whole port.
2863          */
2864         if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
2865                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
2866                                     "preconditions - canceling the feature for "
2867                                     "the whole port[%d]",
2868                              rxq->queue_id, rxq->port_id);
2869                 adapter->rx_bulk_alloc_allowed = false;
2870         }
2871
2872         /*
2873          * Allocate software ring. Allow for space at the end of the
2874          * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
2875          * function does not access an invalid memory region.
2876          */
2877         len = nb_desc;
2878         if (adapter->rx_bulk_alloc_allowed)
2879                 len += RTE_PMD_IXGBE_RX_MAX_BURST;
2880
2881         rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
2882                                           sizeof(struct ixgbe_rx_entry) * len,
2883                                           RTE_CACHE_LINE_SIZE, socket_id);
2884         if (!rxq->sw_ring) {
2885                 ixgbe_rx_queue_release(rxq);
2886                 return -ENOMEM;
2887         }
2888
2889         /*
2890          * Always allocate even if it's not going to be needed in order to
2891          * simplify the code.
2892          *
2893          * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
2894          * be requested in ixgbe_dev_rx_init(), which is called later from
2895          * dev_start() flow.
2896          */
2897         rxq->sw_sc_ring =
2898                 rte_zmalloc_socket("rxq->sw_sc_ring",
2899                                    sizeof(struct ixgbe_scattered_rx_entry) * len,
2900                                    RTE_CACHE_LINE_SIZE, socket_id);
2901         if (!rxq->sw_sc_ring) {
2902                 ixgbe_rx_queue_release(rxq);
2903                 return -ENOMEM;
2904         }
2905
2906         PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
2907                             "dma_addr=0x%"PRIx64,
2908                      rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
2909                      rxq->rx_ring_phys_addr);
2910
2911         if (!rte_is_power_of_2(nb_desc)) {
2912                 PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
2913                                     "preconditions - canceling the feature for "
2914                                     "the whole port[%d]",
2915                              rxq->queue_id, rxq->port_id);
2916                 adapter->rx_vec_allowed = false;
2917         } else
2918                 ixgbe_rxq_vec_setup(rxq);
2919
2920         dev->data->rx_queues[queue_idx] = rxq;
2921
2922         ixgbe_reset_rx_queue(adapter, rxq);
2923
2924         return 0;
2925 }
2926
2927 uint32_t
2928 ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2929 {
2930 #define IXGBE_RXQ_SCAN_INTERVAL 4
2931         volatile union ixgbe_adv_rx_desc *rxdp;
2932         struct ixgbe_rx_queue *rxq;
2933         uint32_t desc = 0;
2934
2935         rxq = dev->data->rx_queues[rx_queue_id];
2936         rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2937
2938         while ((desc < rxq->nb_rx_desc) &&
2939                 (rxdp->wb.upper.status_error &
2940                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
2941                 desc += IXGBE_RXQ_SCAN_INTERVAL;
2942                 rxdp += IXGBE_RXQ_SCAN_INTERVAL;
2943                 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2944                         rxdp = &(rxq->rx_ring[rxq->rx_tail +
2945                                 desc - rxq->nb_rx_desc]);
2946         }
2947
2948         return desc;
2949 }
2950
2951 int
2952 ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2953 {
2954         volatile union ixgbe_adv_rx_desc *rxdp;
2955         struct ixgbe_rx_queue *rxq = rx_queue;
2956         uint32_t desc;
2957
2958         if (unlikely(offset >= rxq->nb_rx_desc))
2959                 return 0;
2960         desc = rxq->rx_tail + offset;
2961         if (desc >= rxq->nb_rx_desc)
2962                 desc -= rxq->nb_rx_desc;
2963
2964         rxdp = &rxq->rx_ring[desc];
2965         return !!(rxdp->wb.upper.status_error &
2966                         rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
2967 }
2968
2969 int
2970 ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
2971 {
2972         struct ixgbe_rx_queue *rxq = rx_queue;
2973         volatile uint32_t *status;
2974         uint32_t nb_hold, desc;
2975
2976         if (unlikely(offset >= rxq->nb_rx_desc))
2977                 return -EINVAL;
2978
2979 #ifdef RTE_IXGBE_INC_VECTOR
2980         if (rxq->rx_using_sse)
2981                 nb_hold = rxq->rxrearm_nb;
2982         else
2983 #endif
2984                 nb_hold = rxq->nb_rx_hold;
2985         if (offset >= rxq->nb_rx_desc - nb_hold)
2986                 return RTE_ETH_RX_DESC_UNAVAIL;
2987
2988         desc = rxq->rx_tail + offset;
2989         if (desc >= rxq->nb_rx_desc)
2990                 desc -= rxq->nb_rx_desc;
2991
2992         status = &rxq->rx_ring[desc].wb.upper.status_error;
2993         if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
2994                 return RTE_ETH_RX_DESC_DONE;
2995
2996         return RTE_ETH_RX_DESC_AVAIL;
2997 }
2998
2999 int
3000 ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3001 {
3002         struct ixgbe_tx_queue *txq = tx_queue;
3003         volatile uint32_t *status;
3004         uint32_t desc;
3005
3006         if (unlikely(offset >= txq->nb_tx_desc))
3007                 return -EINVAL;
3008
3009         desc = txq->tx_tail + offset;
3010         /* go to next desc that has the RS bit */
3011         desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3012                 txq->tx_rs_thresh;
3013         if (desc >= txq->nb_tx_desc) {
3014                 desc -= txq->nb_tx_desc;
3015                 if (desc >= txq->nb_tx_desc)
3016                         desc -= txq->nb_tx_desc;
3017         }
3018
3019         status = &txq->tx_ring[desc].wb.status;
3020         if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3021                 return RTE_ETH_TX_DESC_DONE;
3022
3023         return RTE_ETH_TX_DESC_FULL;
3024 }
3025
3026 void __attribute__((cold))
3027 ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3028 {
3029         unsigned i;
3030         struct ixgbe_adapter *adapter =
3031                 (struct ixgbe_adapter *)dev->data->dev_private;
3032
3033         PMD_INIT_FUNC_TRACE();
3034
3035         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3036                 struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3037
3038                 if (txq != NULL) {
3039                         txq->ops->release_mbufs(txq);
3040                         txq->ops->reset(txq);
3041                 }
3042         }
3043
3044         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3045                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3046
3047                 if (rxq != NULL) {
3048                         ixgbe_rx_queue_release_mbufs(rxq);
3049                         ixgbe_reset_rx_queue(adapter, rxq);
3050                 }
3051         }
3052 }
3053
3054 void
3055 ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3056 {
3057         unsigned i;
3058
3059         PMD_INIT_FUNC_TRACE();
3060
3061         for (i = 0; i < dev->data->nb_rx_queues; i++) {
3062                 ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3063                 dev->data->rx_queues[i] = NULL;
3064         }
3065         dev->data->nb_rx_queues = 0;
3066
3067         for (i = 0; i < dev->data->nb_tx_queues; i++) {
3068                 ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3069                 dev->data->tx_queues[i] = NULL;
3070         }
3071         dev->data->nb_tx_queues = 0;
3072 }
3073
3074 /*********************************************************************
3075  *
3076  *  Device RX/TX init functions
3077  *
3078  **********************************************************************/
3079
3080 /**
3081  * Receive Side Scaling (RSS)
3082  * See section 7.1.2.8 in the following document:
3083  *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3084  *
3085  * Principles:
3086  * The source and destination IP addresses of the IP header and the source
3087  * and destination ports of TCP/UDP headers, if any, of received packets are
3088  * hashed against a configurable random key to compute a 32-bit RSS hash result.
3089  * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3090  * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3091  * RSS output index which is used as the RX queue index where to store the
3092  * received packets.
3093  * The following output is supplied in the RX write-back descriptor:
3094  *     - 32-bit result of the Microsoft RSS hash function,
3095  *     - 4-bit RSS type field.
3096  */
3097
3098 /*
3099  * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3100  * Used as the default key.
3101  */
3102 static uint8_t rss_intel_key[40] = {
3103         0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3104         0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3105         0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3106         0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3107         0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3108 };
3109
3110 static void
3111 ixgbe_rss_disable(struct rte_eth_dev *dev)
3112 {
3113         struct ixgbe_hw *hw;
3114         uint32_t mrqc;
3115         uint32_t mrqc_reg;
3116
3117         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3118         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3119         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3120         mrqc &= ~IXGBE_MRQC_RSSEN;
3121         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3122 }
3123
3124 static void
3125 ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3126 {
3127         uint8_t  *hash_key;
3128         uint32_t mrqc;
3129         uint32_t rss_key;
3130         uint64_t rss_hf;
3131         uint16_t i;
3132         uint32_t mrqc_reg;
3133         uint32_t rssrk_reg;
3134
3135         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3136         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3137
3138         hash_key = rss_conf->rss_key;
3139         if (hash_key != NULL) {
3140                 /* Fill in RSS hash key */
3141                 for (i = 0; i < 10; i++) {
3142                         rss_key  = hash_key[(i * 4)];
3143                         rss_key |= hash_key[(i * 4) + 1] << 8;
3144                         rss_key |= hash_key[(i * 4) + 2] << 16;
3145                         rss_key |= hash_key[(i * 4) + 3] << 24;
3146                         IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3147                 }
3148         }
3149
3150         /* Set configured hashing protocols in MRQC register */
3151         rss_hf = rss_conf->rss_hf;
3152         mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3153         if (rss_hf & ETH_RSS_IPV4)
3154                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3155         if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3156                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3157         if (rss_hf & ETH_RSS_IPV6)
3158                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3159         if (rss_hf & ETH_RSS_IPV6_EX)
3160                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3161         if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3162                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3163         if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3164                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3165         if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3166                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3167         if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3168                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3169         if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3170                 mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3171         IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3172 }
3173
3174 int
3175 ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3176                           struct rte_eth_rss_conf *rss_conf)
3177 {
3178         struct ixgbe_hw *hw;
3179         uint32_t mrqc;
3180         uint64_t rss_hf;
3181         uint32_t mrqc_reg;
3182
3183         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3184
3185         if (!ixgbe_rss_update_sp(hw->mac.type)) {
3186                 PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3187                         "NIC.");
3188                 return -ENOTSUP;
3189         }
3190         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3191
3192         /*
3193          * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3194          *     "RSS enabling cannot be done dynamically while it must be
3195          *      preceded by a software reset"
3196          * Before changing anything, first check that the update RSS operation
3197          * does not attempt to disable RSS, if RSS was enabled at
3198          * initialization time, or does not attempt to enable RSS, if RSS was
3199          * disabled at initialization time.
3200          */
3201         rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3202         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3203         if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3204                 if (rss_hf != 0) /* Enable RSS */
3205                         return -(EINVAL);
3206                 return 0; /* Nothing to do */
3207         }
3208         /* RSS enabled */
3209         if (rss_hf == 0) /* Disable RSS */
3210                 return -(EINVAL);
3211         ixgbe_hw_rss_hash_set(hw, rss_conf);
3212         return 0;
3213 }
3214
3215 int
3216 ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3217                             struct rte_eth_rss_conf *rss_conf)
3218 {
3219         struct ixgbe_hw *hw;
3220         uint8_t *hash_key;
3221         uint32_t mrqc;
3222         uint32_t rss_key;
3223         uint64_t rss_hf;
3224         uint16_t i;
3225         uint32_t mrqc_reg;
3226         uint32_t rssrk_reg;
3227
3228         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3229         mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3230         rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3231         hash_key = rss_conf->rss_key;
3232         if (hash_key != NULL) {
3233                 /* Return RSS hash key */
3234                 for (i = 0; i < 10; i++) {
3235                         rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3236                         hash_key[(i * 4)] = rss_key & 0x000000FF;
3237                         hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3238                         hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3239                         hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3240                 }
3241         }
3242
3243         /* Get RSS functions configured in MRQC register */
3244         mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3245         if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3246                 rss_conf->rss_hf = 0;
3247                 return 0;
3248         }
3249         rss_hf = 0;
3250         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3251                 rss_hf |= ETH_RSS_IPV4;
3252         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3253                 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3254         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3255                 rss_hf |= ETH_RSS_IPV6;
3256         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3257                 rss_hf |= ETH_RSS_IPV6_EX;
3258         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3259                 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3260         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3261                 rss_hf |= ETH_RSS_IPV6_TCP_EX;
3262         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3263                 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3264         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3265                 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3266         if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3267                 rss_hf |= ETH_RSS_IPV6_UDP_EX;
3268         rss_conf->rss_hf = rss_hf;
3269         return 0;
3270 }
3271
3272 static void
3273 ixgbe_rss_configure(struct rte_eth_dev *dev)
3274 {
3275         struct rte_eth_rss_conf rss_conf;
3276         struct ixgbe_hw *hw;
3277         uint32_t reta;
3278         uint16_t i;
3279         uint16_t j;
3280         uint16_t sp_reta_size;
3281         uint32_t reta_reg;
3282
3283         PMD_INIT_FUNC_TRACE();
3284         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3285
3286         sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3287
3288         /*
3289          * Fill in redirection table
3290          * The byte-swap is needed because NIC registers are in
3291          * little-endian order.
3292          */
3293         reta = 0;
3294         for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3295                 reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3296
3297                 if (j == dev->data->nb_rx_queues)
3298                         j = 0;
3299                 reta = (reta << 8) | j;
3300                 if ((i & 3) == 3)
3301                         IXGBE_WRITE_REG(hw, reta_reg,
3302                                         rte_bswap32(reta));
3303         }
3304
3305         /*
3306          * Configure the RSS key and the RSS protocols used to compute
3307          * the RSS hash of input packets.
3308          */
3309         rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3310         if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3311                 ixgbe_rss_disable(dev);
3312                 return;
3313         }
3314         if (rss_conf.rss_key == NULL)
3315                 rss_conf.rss_key = rss_intel_key; /* Default hash key */
3316         ixgbe_hw_rss_hash_set(hw, &rss_conf);
3317 }
3318
3319 #define NUM_VFTA_REGISTERS 128
3320 #define NIC_RX_BUFFER_SIZE 0x200
3321 #define X550_RX_BUFFER_SIZE 0x180
3322
3323 static void
3324 ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3325 {
3326         struct rte_eth_vmdq_dcb_conf *cfg;
3327         struct ixgbe_hw *hw;
3328         enum rte_eth_nb_pools num_pools;
3329         uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3330         uint16_t pbsize;
3331         uint8_t nb_tcs; /* number of traffic classes */
3332         int i;
3333
3334         PMD_INIT_FUNC_TRACE();
3335         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3336         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3337         num_pools = cfg->nb_queue_pools;
3338         /* Check we have a valid number of pools */
3339         if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3340                 ixgbe_rss_disable(dev);
3341                 return;
3342         }
3343         /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3344         nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3345
3346         /*
3347          * RXPBSIZE
3348          * split rx buffer up into sections, each for 1 traffic class
3349          */
3350         switch (hw->mac.type) {
3351         case ixgbe_mac_X550:
3352         case ixgbe_mac_X550EM_x:
3353         case ixgbe_mac_X550EM_a:
3354                 pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3355                 break;
3356         default:
3357                 pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3358                 break;
3359         }
3360         for (i = 0; i < nb_tcs; i++) {
3361                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3362
3363                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3364                 /* clear 10 bits. */
3365                 rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3366                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3367         }
3368         /* zero alloc all unused TCs */
3369         for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3370                 uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3371
3372                 rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3373                 /* clear 10 bits. */
3374                 IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3375         }
3376
3377         /* MRQC: enable vmdq and dcb */
3378         mrqc = (num_pools == ETH_16_POOLS) ?
3379                 IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3380         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3381
3382         /* PFVTCTL: turn on virtualisation and set the default pool */
3383         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3384         if (cfg->enable_default_pool) {
3385                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3386         } else {
3387                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3388         }
3389
3390         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3391
3392         /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3393         queue_mapping = 0;
3394         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3395                 /*
3396                  * mapping is done with 3 bits per priority,
3397                  * so shift by i*3 each time
3398                  */
3399                 queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3400
3401         IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3402
3403         /* RTRPCS: DCB related */
3404         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3405
3406         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3407         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3408         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3409         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3410
3411         /* VFTA - enable all vlan filters */
3412         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3413                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3414         }
3415
3416         /* VFRE: pool enabling for receive - 16 or 32 */
3417         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3418                         num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3419
3420         /*
3421          * MPSAR - allow pools to read specific mac addresses
3422          * In this case, all pools should be able to read from mac addr 0
3423          */
3424         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3425         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3426
3427         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3428         for (i = 0; i < cfg->nb_pool_maps; i++) {
3429                 /* set vlan id in VF register and set the valid bit */
3430                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3431                                 (cfg->pool_map[i].vlan_id & 0xFFF)));
3432                 /*
3433                  * Put the allowed pools in VFB reg. As we only have 16 or 32
3434                  * pools, we only need to use the first half of the register
3435                  * i.e. bits 0-31
3436                  */
3437                 IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3438         }
3439 }
3440
3441 /**
3442  * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3443  * @dev: pointer to eth_dev structure
3444  * @dcb_config: pointer to ixgbe_dcb_config structure
3445  */
3446 static void
3447 ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3448                        struct ixgbe_dcb_config *dcb_config)
3449 {
3450         uint32_t reg;
3451         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3452
3453         PMD_INIT_FUNC_TRACE();
3454         if (hw->mac.type != ixgbe_mac_82598EB) {
3455                 /* Disable the Tx desc arbiter so that MTQC can be changed */
3456                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3457                 reg |= IXGBE_RTTDCS_ARBDIS;
3458                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3459
3460                 /* Enable DCB for Tx with 8 TCs */
3461                 if (dcb_config->num_tcs.pg_tcs == 8) {
3462                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3463                 } else {
3464                         reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3465                 }
3466                 if (dcb_config->vt_mode)
3467                         reg |= IXGBE_MTQC_VT_ENA;
3468                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3469
3470                 /* Enable the Tx desc arbiter */
3471                 reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3472                 reg &= ~IXGBE_RTTDCS_ARBDIS;
3473                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3474
3475                 /* Enable Security TX Buffer IFG for DCB */
3476                 reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3477                 reg |= IXGBE_SECTX_DCB;
3478                 IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3479         }
3480 }
3481
3482 /**
3483  * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3484  * @dev: pointer to rte_eth_dev structure
3485  * @dcb_config: pointer to ixgbe_dcb_config structure
3486  */
3487 static void
3488 ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3489                         struct ixgbe_dcb_config *dcb_config)
3490 {
3491         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3492                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3493         struct ixgbe_hw *hw =
3494                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3495
3496         PMD_INIT_FUNC_TRACE();
3497         if (hw->mac.type != ixgbe_mac_82598EB)
3498                 /*PF VF Transmit Enable*/
3499                 IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3500                         vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3501
3502         /*Configure general DCB TX parameters*/
3503         ixgbe_dcb_tx_hw_config(dev, dcb_config);
3504 }
3505
3506 static void
3507 ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3508                         struct ixgbe_dcb_config *dcb_config)
3509 {
3510         struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3511                         &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3512         struct ixgbe_dcb_tc_config *tc;
3513         uint8_t i, j;
3514
3515         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3516         if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3517                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3518                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3519         } else {
3520                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3521                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3522         }
3523         /* User Priority to Traffic Class mapping */
3524         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3525                 j = vmdq_rx_conf->dcb_tc[i];
3526                 tc = &dcb_config->tc_config[j];
3527                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3528                                                 (uint8_t)(1 << j);
3529         }
3530 }
3531
3532 static void
3533 ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3534                         struct ixgbe_dcb_config *dcb_config)
3535 {
3536         struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3537                         &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3538         struct ixgbe_dcb_tc_config *tc;
3539         uint8_t i, j;
3540
3541         /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3542         if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3543                 dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3544                 dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3545         } else {
3546                 dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3547                 dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3548         }
3549
3550         /* User Priority to Traffic Class mapping */
3551         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3552                 j = vmdq_tx_conf->dcb_tc[i];
3553                 tc = &dcb_config->tc_config[j];
3554                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3555                                                 (uint8_t)(1 << j);
3556         }
3557 }
3558
3559 static void
3560 ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3561                 struct ixgbe_dcb_config *dcb_config)
3562 {
3563         struct rte_eth_dcb_rx_conf *rx_conf =
3564                         &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3565         struct ixgbe_dcb_tc_config *tc;
3566         uint8_t i, j;
3567
3568         dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3569         dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3570
3571         /* User Priority to Traffic Class mapping */
3572         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3573                 j = rx_conf->dcb_tc[i];
3574                 tc = &dcb_config->tc_config[j];
3575                 tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap =
3576                                                 (uint8_t)(1 << j);
3577         }
3578 }
3579
3580 static void
3581 ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3582                 struct ixgbe_dcb_config *dcb_config)
3583 {
3584         struct rte_eth_dcb_tx_conf *tx_conf =
3585                         &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3586         struct ixgbe_dcb_tc_config *tc;
3587         uint8_t i, j;
3588
3589         dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3590         dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3591
3592         /* User Priority to Traffic Class mapping */
3593         for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3594                 j = tx_conf->dcb_tc[i];
3595                 tc = &dcb_config->tc_config[j];
3596                 tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap =
3597                                                 (uint8_t)(1 << j);
3598         }
3599 }
3600
3601 /**
3602  * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3603  * @dev: pointer to eth_dev structure
3604  * @dcb_config: pointer to ixgbe_dcb_config structure
3605  */
3606 static void
3607 ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3608                        struct ixgbe_dcb_config *dcb_config)
3609 {
3610         uint32_t reg;
3611         uint32_t vlanctrl;
3612         uint8_t i;
3613         uint32_t q;
3614         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3615
3616         PMD_INIT_FUNC_TRACE();
3617         /*
3618          * Disable the arbiter before changing parameters
3619          * (always enable recycle mode; WSP)
3620          */
3621         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3622         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3623
3624         if (hw->mac.type != ixgbe_mac_82598EB) {
3625                 reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3626                 if (dcb_config->num_tcs.pg_tcs == 4) {
3627                         if (dcb_config->vt_mode)
3628                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3629                                         IXGBE_MRQC_VMDQRT4TCEN;
3630                         else {
3631                                 /* no matter the mode is DCB or DCB_RSS, just
3632                                  * set the MRQE to RSSXTCEN. RSS is controlled
3633                                  * by RSS_FIELD
3634                                  */
3635                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3636                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3637                                         IXGBE_MRQC_RTRSS4TCEN;
3638                         }
3639                 }
3640                 if (dcb_config->num_tcs.pg_tcs == 8) {
3641                         if (dcb_config->vt_mode)
3642                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3643                                         IXGBE_MRQC_VMDQRT8TCEN;
3644                         else {
3645                                 IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3646                                 reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3647                                         IXGBE_MRQC_RTRSS8TCEN;
3648                         }
3649                 }
3650
3651                 IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3652
3653                 if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3654                         /* Disable drop for all queues in VMDQ mode*/
3655                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3656                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3657                                                 (IXGBE_QDE_WRITE |
3658                                                  (q << IXGBE_QDE_IDX_SHIFT)));
3659                 } else {
3660                         /* Enable drop for all queues in SRIOV mode */
3661                         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3662                                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
3663                                                 (IXGBE_QDE_WRITE |
3664                                                  (q << IXGBE_QDE_IDX_SHIFT) |
3665                                                  IXGBE_QDE_ENABLE));
3666                 }
3667         }
3668
3669         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3670         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3671         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3672         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3673
3674         /* VFTA - enable all vlan filters */
3675         for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3676                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3677         }
3678
3679         /*
3680          * Configure Rx packet plane (recycle mode; WSP) and
3681          * enable arbiter
3682          */
3683         reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
3684         IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3685 }
3686
3687 static void
3688 ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
3689                         uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3690 {
3691         switch (hw->mac.type) {
3692         case ixgbe_mac_82598EB:
3693                 ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
3694                 break;
3695         case ixgbe_mac_82599EB:
3696         case ixgbe_mac_X540:
3697         case ixgbe_mac_X550:
3698         case ixgbe_mac_X550EM_x:
3699         case ixgbe_mac_X550EM_a:
3700                 ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
3701                                                   tsa, map);
3702                 break;
3703         default:
3704                 break;
3705         }
3706 }
3707
3708 static void
3709 ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
3710                             uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
3711 {
3712         switch (hw->mac.type) {
3713         case ixgbe_mac_82598EB:
3714                 ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
3715                 ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
3716                 break;
3717         case ixgbe_mac_82599EB:
3718         case ixgbe_mac_X540:
3719         case ixgbe_mac_X550:
3720         case ixgbe_mac_X550EM_x:
3721         case ixgbe_mac_X550EM_a:
3722                 ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
3723                 ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
3724                 break;
3725         default:
3726                 break;
3727         }
3728 }
3729
3730 #define DCB_RX_CONFIG  1
3731 #define DCB_TX_CONFIG  1
3732 #define DCB_TX_PB      1024
3733 /**
3734  * ixgbe_dcb_hw_configure - Enable DCB and configure
3735  * general DCB in VT mode and non-VT mode parameters
3736  * @dev: pointer to rte_eth_dev structure
3737  * @dcb_config: pointer to ixgbe_dcb_config structure
3738  */
3739 static int
3740 ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
3741                         struct ixgbe_dcb_config *dcb_config)
3742 {
3743         int     ret = 0;
3744         uint8_t i, pfc_en, nb_tcs;
3745         uint16_t pbsize, rx_buffer_size;
3746         uint8_t config_dcb_rx = 0;
3747         uint8_t config_dcb_tx = 0;
3748         uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3749         uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3750         uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3751         uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3752         uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
3753         struct ixgbe_dcb_tc_config *tc;
3754         uint32_t max_frame = dev->data->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
3755         struct ixgbe_hw *hw =
3756                         IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3757         struct ixgbe_bw_conf *bw_conf =
3758                 IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
3759
3760         switch (dev->data->dev_conf.rxmode.mq_mode) {
3761         case ETH_MQ_RX_VMDQ_DCB:
3762                 dcb_config->vt_mode = true;
3763                 if (hw->mac.type != ixgbe_mac_82598EB) {
3764                         config_dcb_rx = DCB_RX_CONFIG;
3765                         /*
3766                          *get dcb and VT rx configuration parameters
3767                          *from rte_eth_conf
3768                          */
3769                         ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
3770                         /*Configure general VMDQ and DCB RX parameters*/
3771                         ixgbe_vmdq_dcb_configure(dev);
3772                 }
3773                 break;
3774         case ETH_MQ_RX_DCB:
3775         case ETH_MQ_RX_DCB_RSS:
3776                 dcb_config->vt_mode = false;
3777                 config_dcb_rx = DCB_RX_CONFIG;
3778                 /* Get dcb TX configuration parameters from rte_eth_conf */
3779                 ixgbe_dcb_rx_config(dev, dcb_config);
3780                 /*Configure general DCB RX parameters*/
3781                 ixgbe_dcb_rx_hw_config(dev, dcb_config);
3782                 break;
3783         default:
3784                 PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
3785                 break;
3786         }
3787         switch (dev->data->dev_conf.txmode.mq_mode) {
3788         case ETH_MQ_TX_VMDQ_DCB:
3789                 dcb_config->vt_mode = true;
3790                 config_dcb_tx = DCB_TX_CONFIG;
3791                 /* get DCB and VT TX configuration parameters
3792                  * from rte_eth_conf
3793                  */
3794                 ixgbe_dcb_vt_tx_config(dev, dcb_config);
3795                 /*Configure general VMDQ and DCB TX parameters*/
3796                 ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
3797                 break;
3798
3799         case ETH_MQ_TX_DCB:
3800                 dcb_config->vt_mode = false;
3801                 config_dcb_tx = DCB_TX_CONFIG;
3802                 /*get DCB TX configuration parameters from rte_eth_conf*/
3803                 ixgbe_dcb_tx_config(dev, dcb_config);
3804                 /*Configure general DCB TX parameters*/
3805                 ixgbe_dcb_tx_hw_config(dev, dcb_config);
3806                 break;
3807         default:
3808                 PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
3809                 break;
3810         }
3811
3812         nb_tcs = dcb_config->num_tcs.pfc_tcs;
3813         /* Unpack map */
3814         ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
3815         if (nb_tcs == ETH_4_TCS) {
3816                 /* Avoid un-configured priority mapping to TC0 */
3817                 uint8_t j = 4;
3818                 uint8_t mask = 0xFF;
3819
3820                 for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
3821                         mask = (uint8_t)(mask & (~(1 << map[i])));
3822                 for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
3823                         if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
3824                                 map[j++] = i;
3825                         mask >>= 1;
3826                 }
3827                 /* Re-configure 4 TCs BW */
3828                 for (i = 0; i < nb_tcs; i++) {
3829                         tc = &dcb_config->tc_config[i];
3830                         if (bw_conf->tc_num != nb_tcs)
3831                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3832                                         (uint8_t)(100 / nb_tcs);
3833                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3834                                                 (uint8_t)(100 / nb_tcs);
3835                 }
3836                 for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
3837                         tc = &dcb_config->tc_config[i];
3838                         tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
3839                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
3840                 }
3841         } else {
3842                 /* Re-configure 8 TCs BW */
3843                 for (i = 0; i < nb_tcs; i++) {
3844                         tc = &dcb_config->tc_config[i];
3845                         if (bw_conf->tc_num != nb_tcs)
3846                                 tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
3847                                         (uint8_t)(100 / nb_tcs + (i & 1));
3848                         tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
3849                                 (uint8_t)(100 / nb_tcs + (i & 1));
3850                 }
3851         }
3852
3853         switch (hw->mac.type) {
3854         case ixgbe_mac_X550:
3855         case ixgbe_mac_X550EM_x:
3856         case ixgbe_mac_X550EM_a:
3857                 rx_buffer_size = X550_RX_BUFFER_SIZE;
3858                 break;
3859         default:
3860                 rx_buffer_size = NIC_RX_BUFFER_SIZE;
3861                 break;
3862         }
3863
3864         if (config_dcb_rx) {
3865                 /* Set RX buffer size */
3866                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3867                 uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
3868
3869                 for (i = 0; i < nb_tcs; i++) {
3870                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3871                 }
3872                 /* zero alloc all unused TCs */
3873                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3874                         IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
3875                 }
3876         }
3877         if (config_dcb_tx) {
3878                 /* Only support an equally distributed
3879                  *  Tx packet buffer strategy.
3880                  */
3881                 uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
3882                 uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
3883
3884                 for (i = 0; i < nb_tcs; i++) {
3885                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
3886                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
3887                 }
3888                 /* Clear unused TCs, if any, to zero buffer size*/
3889                 for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3890                         IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
3891                         IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
3892                 }
3893         }
3894
3895         /*Calculates traffic class credits*/
3896         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3897                                 IXGBE_DCB_TX_CONFIG);
3898         ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
3899                                 IXGBE_DCB_RX_CONFIG);
3900
3901         if (config_dcb_rx) {
3902                 /* Unpack CEE standard containers */
3903                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
3904                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3905                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
3906                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
3907                 /* Configure PG(ETS) RX */
3908                 ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
3909         }
3910
3911         if (config_dcb_tx) {
3912                 /* Unpack CEE standard containers */
3913                 ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
3914                 ixgbe_dcb_unpack_max_cee(dcb_config, max);
3915                 ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
3916                 ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
3917                 /* Configure PG(ETS) TX */
3918                 ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
3919         }
3920
3921         /*Configure queue statistics registers*/
3922         ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
3923
3924         /* Check if the PFC is supported */
3925         if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
3926                 pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
3927                 for (i = 0; i < nb_tcs; i++) {
3928                         /*
3929                         * If the TC count is 8,and the default high_water is 48,
3930                         * the low_water is 16 as default.
3931                         */
3932                         hw->fc.high_water[i] = (pbsize * 3) / 4;
3933                         hw->fc.low_water[i] = pbsize / 4;
3934                         /* Enable pfc for this TC */
3935                         tc = &dcb_config->tc_config[i];
3936                         tc->pfc = ixgbe_dcb_pfc_enabled;
3937                 }
3938                 ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
3939                 if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
3940                         pfc_en &= 0x0F;
3941                 ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
3942         }
3943
3944         return ret;
3945 }
3946
3947 /**
3948  * ixgbe_configure_dcb - Configure DCB  Hardware
3949  * @dev: pointer to rte_eth_dev
3950  */
3951 void ixgbe_configure_dcb(struct rte_eth_dev *dev)
3952 {
3953         struct ixgbe_dcb_config *dcb_cfg =
3954                         IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
3955         struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
3956
3957         PMD_INIT_FUNC_TRACE();
3958
3959         /* check support mq_mode for DCB */
3960         if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
3961             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
3962             (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
3963                 return;
3964
3965         if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
3966                 return;
3967
3968         /** Configure DCB hardware **/
3969         ixgbe_dcb_hw_configure(dev, dcb_cfg);
3970 }
3971
3972 /*
3973  * VMDq only support for 10 GbE NIC.
3974  */
3975 static void
3976 ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
3977 {
3978         struct rte_eth_vmdq_rx_conf *cfg;
3979         struct ixgbe_hw *hw;
3980         enum rte_eth_nb_pools num_pools;
3981         uint32_t mrqc, vt_ctl, vlanctrl;
3982         uint32_t vmolr = 0;
3983         int i;
3984
3985         PMD_INIT_FUNC_TRACE();
3986         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3987         cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
3988         num_pools = cfg->nb_queue_pools;
3989
3990         ixgbe_rss_disable(dev);
3991
3992         /* MRQC: enable vmdq */
3993         mrqc = IXGBE_MRQC_VMDQEN;
3994         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3995
3996         /* PFVTCTL: turn on virtualisation and set the default pool */
3997         vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3998         if (cfg->enable_default_pool)
3999                 vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4000         else
4001                 vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4002
4003         IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4004
4005         for (i = 0; i < (int)num_pools; i++) {
4006                 vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4007                 IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4008         }
4009
4010         /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4011         vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4012         vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4013         IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4014
4015         /* VFTA - enable all vlan filters */
4016         for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4017                 IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4018
4019         /* VFRE: pool enabling for receive - 64 */
4020         IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4021         if (num_pools == ETH_64_POOLS)
4022                 IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4023
4024         /*
4025          * MPSAR - allow pools to read specific mac addresses
4026          * In this case, all pools should be able to read from mac addr 0
4027          */
4028         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4029         IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4030
4031         /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4032         for (i = 0; i < cfg->nb_pool_maps; i++) {
4033                 /* set vlan id in VF register and set the valid bit */
4034                 IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4035                                 (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4036                 /*
4037                  * Put the allowed pools in VFB reg. As we only have 16 or 64
4038                  * pools, we only need to use the first half of the register
4039                  * i.e. bits 0-31
4040                  */
4041                 if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4042                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4043                                         (cfg->pool_map[i].pools & UINT32_MAX));
4044                 else
4045                         IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4046                                         ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4047
4048         }
4049
4050         /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4051         if (cfg->enable_loop_back) {
4052                 IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4053                 for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4054                         IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4055         }
4056
4057         IXGBE_WRITE_FLUSH(hw);
4058 }
4059
4060 /*
4061  * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4062  * @hw: pointer to hardware structure
4063  */
4064 static void
4065 ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4066 {
4067         uint32_t reg;
4068         uint32_t q;
4069
4070         PMD_INIT_FUNC_TRACE();
4071         /*PF VF Transmit Enable*/
4072         IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4073         IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4074
4075         /* Disable the Tx desc arbiter so that MTQC can be changed */
4076         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4077         reg |= IXGBE_RTTDCS_ARBDIS;
4078         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4079
4080         reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4081         IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4082
4083         /* Disable drop for all queues */
4084         for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4085                 IXGBE_WRITE_REG(hw, IXGBE_QDE,
4086                   (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4087
4088         /* Enable the Tx desc arbiter */
4089         reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4090         reg &= ~IXGBE_RTTDCS_ARBDIS;
4091         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4092
4093         IXGBE_WRITE_FLUSH(hw);
4094 }
4095
4096 static int __attribute__((cold))
4097 ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4098 {
4099         struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4100         uint64_t dma_addr;
4101         unsigned int i;
4102
4103         /* Initialize software ring entries */
4104         for (i = 0; i < rxq->nb_rx_desc; i++) {
4105                 volatile union ixgbe_adv_rx_desc *rxd;
4106                 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4107
4108                 if (mbuf == NULL) {
4109                         PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4110                                      (unsigned) rxq->queue_id);
4111                         return -ENOMEM;
4112                 }
4113
4114                 rte_mbuf_refcnt_set(mbuf, 1);
4115                 mbuf->next = NULL;
4116                 mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4117                 mbuf->nb_segs = 1;
4118                 mbuf->port = rxq->port_id;
4119
4120                 dma_addr =
4121                         rte_cpu_to_le_64(rte_mbuf_data_dma_addr_default(mbuf));
4122                 rxd = &rxq->rx_ring[i];
4123                 rxd->read.hdr_addr = 0;
4124                 rxd->read.pkt_addr = dma_addr;
4125                 rxe[i].mbuf = mbuf;
4126         }
4127
4128         return 0;
4129 }
4130
4131 static int
4132 ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4133 {
4134         struct ixgbe_hw *hw;
4135         uint32_t mrqc;
4136
4137         ixgbe_rss_configure(dev);
4138
4139         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4140
4141         /* MRQC: enable VF RSS */
4142         mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4143         mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4144         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4145         case ETH_64_POOLS:
4146                 mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4147                 break;
4148
4149         case ETH_32_POOLS:
4150                 mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4151                 break;
4152
4153         default:
4154                 PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4155                 return -EINVAL;
4156         }
4157
4158         IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4159
4160         return 0;
4161 }
4162
4163 static int
4164 ixgbe_config_vf_default(struct rte_eth_dev *dev)
4165 {
4166         struct ixgbe_hw *hw =
4167                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4168
4169         switch (RTE_ETH_DEV_SRIOV(dev).active) {
4170         case ETH_64_POOLS:
4171                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4172                         IXGBE_MRQC_VMDQEN);
4173                 break;
4174
4175         case ETH_32_POOLS:
4176                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4177                         IXGBE_MRQC_VMDQRT4TCEN);
4178                 break;
4179
4180         case ETH_16_POOLS:
4181                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4182                         IXGBE_MRQC_VMDQRT8TCEN);
4183                 break;
4184         default:
4185                 PMD_INIT_LOG(ERR,
4186                         "invalid pool number in IOV mode");
4187                 break;
4188         }
4189         return 0;
4190 }
4191
4192 static int
4193 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4194 {
4195         struct ixgbe_hw *hw =
4196                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4197
4198         if (hw->mac.type == ixgbe_mac_82598EB)
4199                 return 0;
4200
4201         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4202                 /*
4203                  * SRIOV inactive scheme
4204                  * any DCB/RSS w/o VMDq multi-queue setting
4205                  */
4206                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4207                 case ETH_MQ_RX_RSS:
4208                 case ETH_MQ_RX_DCB_RSS:
4209                 case ETH_MQ_RX_VMDQ_RSS:
4210                         ixgbe_rss_configure(dev);
4211                         break;
4212
4213                 case ETH_MQ_RX_VMDQ_DCB:
4214                         ixgbe_vmdq_dcb_configure(dev);
4215                         break;
4216
4217                 case ETH_MQ_RX_VMDQ_ONLY:
4218                         ixgbe_vmdq_rx_hw_configure(dev);
4219                         break;
4220
4221                 case ETH_MQ_RX_NONE:
4222                 default:
4223                         /* if mq_mode is none, disable rss mode.*/
4224                         ixgbe_rss_disable(dev);
4225                         break;
4226                 }
4227         } else {
4228                 /* SRIOV active scheme
4229                  * Support RSS together with SRIOV.
4230                  */
4231                 switch (dev->data->dev_conf.rxmode.mq_mode) {
4232                 case ETH_MQ_RX_RSS:
4233                 case ETH_MQ_RX_VMDQ_RSS:
4234                         ixgbe_config_vf_rss(dev);
4235                         break;
4236                 case ETH_MQ_RX_VMDQ_DCB:
4237                 case ETH_MQ_RX_DCB:
4238                 /* In SRIOV, the configuration is the same as VMDq case */
4239                         ixgbe_vmdq_dcb_configure(dev);
4240                         break;
4241                 /* DCB/RSS together with SRIOV is not supported */
4242                 case ETH_MQ_RX_VMDQ_DCB_RSS:
4243                 case ETH_MQ_RX_DCB_RSS:
4244                         PMD_INIT_LOG(ERR,
4245                                 "Could not support DCB/RSS with VMDq & SRIOV");
4246                         return -1;
4247                 default:
4248                         ixgbe_config_vf_default(dev);
4249                         break;
4250                 }
4251         }
4252
4253         return 0;
4254 }
4255
4256 static int
4257 ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4258 {
4259         struct ixgbe_hw *hw =
4260                 IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4261         uint32_t mtqc;
4262         uint32_t rttdcs;
4263
4264         if (hw->mac.type == ixgbe_mac_82598EB)
4265                 return 0;
4266
4267         /* disable arbiter before setting MTQC */
4268         rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4269         rttdcs |= IXGBE_RTTDCS_ARBDIS;
4270         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4271
4272         if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4273                 /*
4274                  * SRIOV inactive scheme
4275                  * any DCB w/o VMDq multi-queue setting
4276                  */
4277                 if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4278                         ixgbe_vmdq_tx_hw_configure(hw);
4279                 else {
4280                         mtqc = IXGBE_MTQC_64Q_1PB;
4281                         IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4282                 }
4283         } else {
4284                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
4285
4286                 /*
4287                  * SRIOV active scheme
4288                  * FIXME if support DCB together with VMDq & SRIOV
4289                  */
4290                 case ETH_64_POOLS:
4291                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4292                         break;
4293                 case ETH_32_POOLS:
4294                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4295                         break;
4296                 case ETH_16_POOLS:
4297                         mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4298                                 IXGBE_MTQC_8TC_8TQ;
4299                         break;
4300                 default:
4301                         mtqc = IXGBE_MTQC_64Q_1PB;
4302                         PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4303                 }
4304                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4305         }
4306
4307         /* re-enable arbiter */
4308         rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4309         IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4310
4311         return 0;
4312 }
4313
4314 /**
4315  * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4316  *
4317  * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4318  * spec rev. 3.0 chapter 8.2.3.8.13.
4319  *
4320  * @pool Memory pool of the Rx queue
4321  */
4322 static inline uint32_t
4323 ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4324 {
4325         struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4326
4327         /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4328         uint16_t maxdesc =
4329                 IPV4_MAX_PKT_LEN /
4330                         (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4331
4332         if (maxdesc >= 16)
4333                 return IXGBE_RSCCTL_MAXDESC_16;
4334         else if (maxdesc >= 8)
4335                 return IXGBE_RSCCTL_MAXDESC_8;
4336         else if (maxdesc >= 4)
4337                 return IXGBE_RSCCTL_MAXDESC_4;
4338         else
4339                 return IXGBE_RSCCTL_MAXDESC_1;
4340 }
4341
4342 /**
4343  * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4344  * interrupt
4345  *
4346  * (Taken from FreeBSD tree)
4347  * (yes this is all very magic and confusing :)
4348  *
4349  * @dev port handle
4350  * @entry the register array entry
4351  * @vector the MSIX vector for this queue
4352  * @type RX/TX/MISC
4353  */
4354 static void
4355 ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4356 {
4357         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4358         u32 ivar, index;
4359
4360         vector |= IXGBE_IVAR_ALLOC_VAL;
4361
4362         switch (hw->mac.type) {
4363
4364         case ixgbe_mac_82598EB:
4365                 if (type == -1)
4366                         entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4367                 else
4368                         entry += (type * 64);
4369                 index = (entry >> 2) & 0x1F;
4370                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4371                 ivar &= ~(0xFF << (8 * (entry & 0x3)));
4372                 ivar |= (vector << (8 * (entry & 0x3)));
4373                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4374                 break;
4375
4376         case ixgbe_mac_82599EB:
4377         case ixgbe_mac_X540:
4378                 if (type == -1) { /* MISC IVAR */
4379                         index = (entry & 1) * 8;
4380                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4381                         ivar &= ~(0xFF << index);
4382                         ivar |= (vector << index);
4383                         IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4384                 } else {        /* RX/TX IVARS */
4385                         index = (16 * (entry & 1)) + (8 * type);
4386                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4387                         ivar &= ~(0xFF << index);
4388                         ivar |= (vector << index);
4389                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4390                 }
4391
4392                 break;
4393
4394         default:
4395                 break;
4396         }
4397 }
4398
4399 void __attribute__((cold))
4400 ixgbe_set_rx_function(struct rte_eth_dev *dev)
4401 {
4402         uint16_t i, rx_using_sse;
4403         struct ixgbe_adapter *adapter =
4404                 (struct ixgbe_adapter *)dev->data->dev_private;
4405
4406         /*
4407          * In order to allow Vector Rx there are a few configuration
4408          * conditions to be met and Rx Bulk Allocation should be allowed.
4409          */
4410         if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4411             !adapter->rx_bulk_alloc_allowed) {
4412                 PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4413                                     "preconditions or RTE_IXGBE_INC_VECTOR is "
4414                                     "not enabled",
4415                              dev->data->port_id);
4416
4417                 adapter->rx_vec_allowed = false;
4418         }
4419
4420         /*
4421          * Initialize the appropriate LRO callback.
4422          *
4423          * If all queues satisfy the bulk allocation preconditions
4424          * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4425          * Otherwise use a single allocation version.
4426          */
4427         if (dev->data->lro) {
4428                 if (adapter->rx_bulk_alloc_allowed) {
4429                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4430                                            "allocation version");
4431                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4432                 } else {
4433                         PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4434                                            "allocation version");
4435                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4436                 }
4437         } else if (dev->data->scattered_rx) {
4438                 /*
4439                  * Set the non-LRO scattered callback: there are Vector and
4440                  * single allocation versions.
4441                  */
4442                 if (adapter->rx_vec_allowed) {
4443                         PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4444                                             "callback (port=%d).",
4445                                      dev->data->port_id);
4446
4447                         dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4448                 } else if (adapter->rx_bulk_alloc_allowed) {
4449                         PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4450                                            "allocation callback (port=%d).",
4451                                      dev->data->port_id);
4452                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4453                 } else {
4454                         PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4455                                             "single allocation) "
4456                                             "Scattered Rx callback "
4457                                             "(port=%d).",
4458                                      dev->data->port_id);
4459
4460                         dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4461                 }
4462         /*
4463          * Below we set "simple" callbacks according to port/queues parameters.
4464          * If parameters allow we are going to choose between the following
4465          * callbacks:
4466          *    - Vector
4467          *    - Bulk Allocation
4468          *    - Single buffer allocation (the simplest one)
4469          */
4470         } else if (adapter->rx_vec_allowed) {
4471                 PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4472                                     "burst size no less than %d (port=%d).",
4473                              RTE_IXGBE_DESCS_PER_LOOP,
4474                              dev->data->port_id);
4475
4476                 dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4477         } else if (adapter->rx_bulk_alloc_allowed) {
4478                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4479                                     "satisfied. Rx Burst Bulk Alloc function "
4480                                     "will be used on port=%d.",
4481                              dev->data->port_id);
4482
4483                 dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4484         } else {
4485                 PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4486                                     "satisfied, or Scattered Rx is requested "
4487                                     "(port=%d).",
4488                              dev->data->port_id);
4489
4490                 dev->rx_pkt_burst = ixgbe_recv_pkts;
4491         }
4492
4493         /* Propagate information about RX function choice through all queues. */
4494
4495         rx_using_sse =
4496                 (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4497                 dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4498
4499         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4500                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4501
4502                 rxq->rx_using_sse = rx_using_sse;
4503         }
4504 }
4505
4506 /**
4507  * ixgbe_set_rsc - configure RSC related port HW registers
4508  *
4509  * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4510  * of 82599 Spec (x540 configuration is virtually the same).
4511  *
4512  * @dev port handle
4513  *
4514  * Returns 0 in case of success or a non-zero error code
4515  */
4516 static int
4517 ixgbe_set_rsc(struct rte_eth_dev *dev)
4518 {
4519         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4520         struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4521         struct rte_eth_dev_info dev_info = { 0 };
4522         bool rsc_capable = false;
4523         uint16_t i;
4524         uint32_t rdrxctl;
4525
4526         /* Sanity check */
4527         dev->dev_ops->dev_infos_get(dev, &dev_info);
4528         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4529                 rsc_capable = true;
4530
4531         if (!rsc_capable && rx_conf->enable_lro) {
4532                 PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4533                                    "support it");
4534                 return -EINVAL;
4535         }
4536
4537         /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4538
4539         if (!rx_conf->hw_strip_crc && rx_conf->enable_lro) {
4540                 /*
4541                  * According to chapter of 4.6.7.2.1 of the Spec Rev.
4542                  * 3.0 RSC configuration requires HW CRC stripping being
4543                  * enabled. If user requested both HW CRC stripping off
4544                  * and RSC on - return an error.
4545                  */
4546                 PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4547                                     "is disabled");
4548                 return -EINVAL;
4549         }
4550
4551         /* RFCTL configuration  */
4552         if (rsc_capable) {
4553                 uint32_t rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4554
4555                 if (rx_conf->enable_lro)
4556                         /*
4557                          * Since NFS packets coalescing is not supported - clear
4558                          * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4559                          * enabled.
4560                          */
4561                         rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4562                                    IXGBE_RFCTL_NFSR_DIS);
4563                 else
4564                         rfctl |= IXGBE_RFCTL_RSC_DIS;
4565
4566                 IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4567         }
4568
4569         /* If LRO hasn't been requested - we are done here. */
4570         if (!rx_conf->enable_lro)
4571                 return 0;
4572
4573         /* Set RDRXCTL.RSCACKC bit */
4574         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4575         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4576         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4577
4578         /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4579         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4580                 struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4581                 uint32_t srrctl =
4582                         IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4583                 uint32_t rscctl =
4584                         IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4585                 uint32_t psrtype =
4586                         IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4587                 uint32_t eitr =
4588                         IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4589
4590                 /*
4591                  * ixgbe PMD doesn't support header-split at the moment.
4592                  *
4593                  * Following the 4.6.7.2.1 chapter of the 82599/x540
4594                  * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4595                  * should be configured even if header split is not
4596                  * enabled. We will configure it 128 bytes following the
4597                  * recommendation in the spec.
4598                  */
4599                 srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4600                 srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4601                                             IXGBE_SRRCTL_BSIZEHDR_MASK;
4602
4603                 /*
4604                  * TODO: Consider setting the Receive Descriptor Minimum
4605                  * Threshold Size for an RSC case. This is not an obviously
4606                  * beneficiary option but the one worth considering...
4607                  */
4608
4609                 rscctl |= IXGBE_RSCCTL_RSCEN;
4610                 rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4611                 psrtype |= IXGBE_PSRTYPE_TCPHDR;
4612
4613                 /*
4614                  * RSC: Set ITR interval corresponding to 2K ints/s.
4615                  *
4616                  * Full-sized RSC aggregations for a 10Gb/s link will
4617                  * arrive at about 20K aggregation/s rate.
4618                  *
4619                  * 2K inst/s rate will make only 10% of the
4620                  * aggregations to be closed due to the interrupt timer
4621                  * expiration for a streaming at wire-speed case.
4622                  *
4623                  * For a sparse streaming case this setting will yield
4624                  * at most 500us latency for a single RSC aggregation.
4625                  */
4626                 eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4627                 eitr |= IXGBE_EITR_INTERVAL_US(500) | IXGBE_EITR_CNT_WDIS;
4628
4629                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4630                 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4631                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4632                 IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4633
4634                 /*
4635                  * RSC requires the mapping of the queue to the
4636                  * interrupt vector.
4637                  */
4638                 ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4639         }
4640
4641         dev->data->lro = 1;
4642
4643         PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4644
4645         return 0;
4646 }
4647
4648 /*
4649  * Initializes Receive Unit.
4650  */
4651 int __attribute__((cold))
4652 ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4653 {
4654         struct ixgbe_hw     *hw;
4655         struct ixgbe_rx_queue *rxq;
4656         uint64_t bus_addr;
4657         uint32_t rxctrl;
4658         uint32_t fctrl;
4659         uint32_t hlreg0;
4660         uint32_t maxfrs;
4661         uint32_t srrctl;
4662         uint32_t rdrxctl;
4663         uint32_t rxcsum;
4664         uint16_t buf_size;
4665         uint16_t i;
4666         struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4667         int rc;
4668
4669         PMD_INIT_FUNC_TRACE();
4670         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4671
4672         /*
4673          * Make sure receives are disabled while setting
4674          * up the RX context (registers, descriptor rings, etc.).
4675          */
4676         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4677         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
4678
4679         /* Enable receipt of broadcasted frames */
4680         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4681         fctrl |= IXGBE_FCTRL_BAM;
4682         fctrl |= IXGBE_FCTRL_DPF;
4683         fctrl |= IXGBE_FCTRL_PMCF;
4684         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4685
4686         /*
4687          * Configure CRC stripping, if any.
4688          */
4689         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4690         if (rx_conf->hw_strip_crc)
4691                 hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
4692         else
4693                 hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
4694
4695         /*
4696          * Configure jumbo frame support, if any.
4697          */
4698         if (rx_conf->jumbo_frame == 1) {
4699                 hlreg0 |= IXGBE_HLREG0_JUMBOEN;
4700                 maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
4701                 maxfrs &= 0x0000FFFF;
4702                 maxfrs |= (rx_conf->max_rx_pkt_len << 16);
4703                 IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
4704         } else
4705                 hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
4706
4707         /*
4708          * If loopback mode is configured for 82599, set LPBK bit.
4709          */
4710         if (hw->mac.type == ixgbe_mac_82599EB &&
4711                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4712                 hlreg0 |= IXGBE_HLREG0_LPBK;
4713         else
4714                 hlreg0 &= ~IXGBE_HLREG0_LPBK;
4715
4716         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4717
4718         /* Setup RX queues */
4719         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4720                 rxq = dev->data->rx_queues[i];
4721
4722                 /*
4723                  * Reset crc_len in case it was changed after queue setup by a
4724                  * call to configure.
4725                  */
4726                 rxq->crc_len = rx_conf->hw_strip_crc ? 0 : ETHER_CRC_LEN;
4727
4728                 /* Setup the Base and Length of the Rx Descriptor Rings */
4729                 bus_addr = rxq->rx_ring_phys_addr;
4730                 IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
4731                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4732                 IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
4733                                 (uint32_t)(bus_addr >> 32));
4734                 IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
4735                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4736                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
4737                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
4738
4739                 /* Configure the SRRCTL register */
4740 #ifdef RTE_HEADER_SPLIT_ENABLE
4741                 /*
4742                  * Configure Header Split
4743                  */
4744                 if (rx_conf->header_split) {
4745                         if (hw->mac.type == ixgbe_mac_82599EB) {
4746                                 /* Must setup the PSRTYPE register */
4747                                 uint32_t psrtype;
4748
4749                                 psrtype = IXGBE_PSRTYPE_TCPHDR |
4750                                         IXGBE_PSRTYPE_UDPHDR   |
4751                                         IXGBE_PSRTYPE_IPV4HDR  |
4752                                         IXGBE_PSRTYPE_IPV6HDR;
4753                                 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4754                         }
4755                         srrctl = ((rx_conf->split_hdr_size <<
4756                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4757                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
4758                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4759                 } else
4760 #endif
4761                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4762
4763                 /* Set if packets are dropped when no descriptors available */
4764                 if (rxq->drop_en)
4765                         srrctl |= IXGBE_SRRCTL_DROP_EN;
4766
4767                 /*
4768                  * Configure the RX buffer size in the BSIZEPACKET field of
4769                  * the SRRCTL register of the queue.
4770                  * The value is in 1 KB resolution. Valid values can be from
4771                  * 1 KB to 16 KB.
4772                  */
4773                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
4774                         RTE_PKTMBUF_HEADROOM);
4775                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
4776                            IXGBE_SRRCTL_BSIZEPKT_MASK);
4777
4778                 IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4779
4780                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
4781                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
4782
4783                 /* It adds dual VLAN length for supporting dual VLAN */
4784                 if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
4785                                             2 * IXGBE_VLAN_TAG_SIZE > buf_size)
4786                         dev->data->scattered_rx = 1;
4787         }
4788
4789         if (rx_conf->enable_scatter)
4790                 dev->data->scattered_rx = 1;
4791
4792         /*
4793          * Device configured with multiple RX queues.
4794          */
4795         ixgbe_dev_mq_rx_configure(dev);
4796
4797         /*
4798          * Setup the Checksum Register.
4799          * Disable Full-Packet Checksum which is mutually exclusive with RSS.
4800          * Enable IP/L4 checkum computation by hardware if requested to do so.
4801          */
4802         rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4803         rxcsum |= IXGBE_RXCSUM_PCSD;
4804         if (rx_conf->hw_ip_checksum)
4805                 rxcsum |= IXGBE_RXCSUM_IPPCSE;
4806         else
4807                 rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
4808
4809         IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4810
4811         if (hw->mac.type == ixgbe_mac_82599EB ||
4812             hw->mac.type == ixgbe_mac_X540) {
4813                 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4814                 if (rx_conf->hw_strip_crc)
4815                         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
4816                 else
4817                         rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
4818                 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
4819                 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4820         }
4821
4822         rc = ixgbe_set_rsc(dev);
4823         if (rc)
4824                 return rc;
4825
4826         ixgbe_set_rx_function(dev);
4827
4828         return 0;
4829 }
4830
4831 /*
4832  * Initializes Transmit Unit.
4833  */
4834 void __attribute__((cold))
4835 ixgbe_dev_tx_init(struct rte_eth_dev *dev)
4836 {
4837         struct ixgbe_hw     *hw;
4838         struct ixgbe_tx_queue *txq;
4839         uint64_t bus_addr;
4840         uint32_t hlreg0;
4841         uint32_t txctrl;
4842         uint16_t i;
4843
4844         PMD_INIT_FUNC_TRACE();
4845         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4846
4847         /* Enable TX CRC (checksum offload requirement) and hw padding
4848          * (TSO requirement)
4849          */
4850         hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4851         hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
4852         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
4853
4854         /* Setup the Base and Length of the Tx Descriptor Rings */
4855         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4856                 txq = dev->data->tx_queues[i];
4857
4858                 bus_addr = txq->tx_ring_phys_addr;
4859                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
4860                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
4861                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
4862                                 (uint32_t)(bus_addr >> 32));
4863                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
4864                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
4865                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
4866                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
4867                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
4868
4869                 /*
4870                  * Disable Tx Head Writeback RO bit, since this hoses
4871                  * bookkeeping if things aren't delivered in order.
4872                  */
4873                 switch (hw->mac.type) {
4874                 case ixgbe_mac_82598EB:
4875                         txctrl = IXGBE_READ_REG(hw,
4876                                                 IXGBE_DCA_TXCTRL(txq->reg_idx));
4877                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4878                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
4879                                         txctrl);
4880                         break;
4881
4882                 case ixgbe_mac_82599EB:
4883                 case ixgbe_mac_X540:
4884                 case ixgbe_mac_X550:
4885                 case ixgbe_mac_X550EM_x:
4886                 case ixgbe_mac_X550EM_a:
4887                 default:
4888                         txctrl = IXGBE_READ_REG(hw,
4889                                                 IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
4890                         txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
4891                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
4892                                         txctrl);
4893                         break;
4894                 }
4895         }
4896
4897         /* Device configured with multiple TX queues. */
4898         ixgbe_dev_mq_tx_configure(dev);
4899 }
4900
4901 /*
4902  * Set up link for 82599 loopback mode Tx->Rx.
4903  */
4904 static inline void __attribute__((cold))
4905 ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
4906 {
4907         PMD_INIT_FUNC_TRACE();
4908
4909         if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
4910                 if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
4911                                 IXGBE_SUCCESS) {
4912                         PMD_INIT_LOG(ERR, "Could not enable loopback mode");
4913                         /* ignore error */
4914                         return;
4915                 }
4916         }
4917
4918         /* Restart link */
4919         IXGBE_WRITE_REG(hw,
4920                         IXGBE_AUTOC,
4921                         IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
4922         ixgbe_reset_pipeline_82599(hw);
4923
4924         hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
4925         msec_delay(50);
4926 }
4927
4928
4929 /*
4930  * Start Transmit and Receive Units.
4931  */
4932 int __attribute__((cold))
4933 ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
4934 {
4935         struct ixgbe_hw     *hw;
4936         struct ixgbe_tx_queue *txq;
4937         struct ixgbe_rx_queue *rxq;
4938         uint32_t txdctl;
4939         uint32_t dmatxctl;
4940         uint32_t rxctrl;
4941         uint16_t i;
4942         int ret = 0;
4943
4944         PMD_INIT_FUNC_TRACE();
4945         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4946
4947         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4948                 txq = dev->data->tx_queues[i];
4949                 /* Setup Transmit Threshold Registers */
4950                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
4951                 txdctl |= txq->pthresh & 0x7F;
4952                 txdctl |= ((txq->hthresh & 0x7F) << 8);
4953                 txdctl |= ((txq->wthresh & 0x7F) << 16);
4954                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
4955         }
4956
4957         if (hw->mac.type != ixgbe_mac_82598EB) {
4958                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
4959                 dmatxctl |= IXGBE_DMATXCTL_TE;
4960                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
4961         }
4962
4963         for (i = 0; i < dev->data->nb_tx_queues; i++) {
4964                 txq = dev->data->tx_queues[i];
4965                 if (!txq->tx_deferred_start) {
4966                         ret = ixgbe_dev_tx_queue_start(dev, i);
4967                         if (ret < 0)
4968                                 return ret;
4969                 }
4970         }
4971
4972         for (i = 0; i < dev->data->nb_rx_queues; i++) {
4973                 rxq = dev->data->rx_queues[i];
4974                 if (!rxq->rx_deferred_start) {
4975                         ret = ixgbe_dev_rx_queue_start(dev, i);
4976                         if (ret < 0)
4977                                 return ret;
4978                 }
4979         }
4980
4981         /* Enable Receive engine */
4982         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4983         if (hw->mac.type == ixgbe_mac_82598EB)
4984                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
4985         rxctrl |= IXGBE_RXCTRL_RXEN;
4986         hw->mac.ops.enable_rx_dma(hw, rxctrl);
4987
4988         /* If loopback mode is enabled for 82599, set up the link accordingly */
4989         if (hw->mac.type == ixgbe_mac_82599EB &&
4990                         dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_82599_TX_RX)
4991                 ixgbe_setup_loopback_link_82599(hw);
4992
4993         return 0;
4994 }
4995
4996 /*
4997  * Start Receive Units for specified queue.
4998  */
4999 int __attribute__((cold))
5000 ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5001 {
5002         struct ixgbe_hw     *hw;
5003         struct ixgbe_rx_queue *rxq;
5004         uint32_t rxdctl;
5005         int poll_ms;
5006
5007         PMD_INIT_FUNC_TRACE();
5008         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5009
5010         if (rx_queue_id < dev->data->nb_rx_queues) {
5011                 rxq = dev->data->rx_queues[rx_queue_id];
5012
5013                 /* Allocate buffers for descriptor rings */
5014                 if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5015                         PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5016                                      rx_queue_id);
5017                         return -1;
5018                 }
5019                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5020                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5021                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5022
5023                 /* Wait until RX Enable ready */
5024                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5025                 do {
5026                         rte_delay_ms(1);
5027                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5028                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5029                 if (!poll_ms)
5030                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d",
5031                                      rx_queue_id);
5032                 rte_wmb();
5033                 IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5034                 IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5035                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5036         } else
5037                 return -1;
5038
5039         return 0;
5040 }
5041
5042 /*
5043  * Stop Receive Units for specified queue.
5044  */
5045 int __attribute__((cold))
5046 ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5047 {
5048         struct ixgbe_hw     *hw;
5049         struct ixgbe_adapter *adapter =
5050                 (struct ixgbe_adapter *)dev->data->dev_private;
5051         struct ixgbe_rx_queue *rxq;
5052         uint32_t rxdctl;
5053         int poll_ms;
5054
5055         PMD_INIT_FUNC_TRACE();
5056         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5057
5058         if (rx_queue_id < dev->data->nb_rx_queues) {
5059                 rxq = dev->data->rx_queues[rx_queue_id];
5060
5061                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5062                 rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5063                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5064
5065                 /* Wait until RX Enable bit clear */
5066                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5067                 do {
5068                         rte_delay_ms(1);
5069                         rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5070                 } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5071                 if (!poll_ms)
5072                         PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d",
5073                                      rx_queue_id);
5074
5075                 rte_delay_us(RTE_IXGBE_WAIT_100_US);
5076
5077                 ixgbe_rx_queue_release_mbufs(rxq);
5078                 ixgbe_reset_rx_queue(adapter, rxq);
5079                 dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5080         } else
5081                 return -1;
5082
5083         return 0;
5084 }
5085
5086
5087 /*
5088  * Start Transmit Units for specified queue.
5089  */
5090 int __attribute__((cold))
5091 ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5092 {
5093         struct ixgbe_hw     *hw;
5094         struct ixgbe_tx_queue *txq;
5095         uint32_t txdctl;
5096         int poll_ms;
5097
5098         PMD_INIT_FUNC_TRACE();
5099         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5100
5101         if (tx_queue_id < dev->data->nb_tx_queues) {
5102                 txq = dev->data->tx_queues[tx_queue_id];
5103                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5104                 txdctl |= IXGBE_TXDCTL_ENABLE;
5105                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5106
5107                 /* Wait until TX Enable ready */
5108                 if (hw->mac.type == ixgbe_mac_82599EB) {
5109                         poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5110                         do {
5111                                 rte_delay_ms(1);
5112                                 txdctl = IXGBE_READ_REG(hw,
5113                                         IXGBE_TXDCTL(txq->reg_idx));
5114                         } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5115                         if (!poll_ms)
5116                                 PMD_INIT_LOG(ERR, "Could not enable "
5117                                              "Tx Queue %d", tx_queue_id);
5118                 }
5119                 rte_wmb();
5120                 IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5121                 IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5122                 dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5123         } else
5124                 return -1;
5125
5126         return 0;
5127 }
5128
5129 /*
5130  * Stop Transmit Units for specified queue.
5131  */
5132 int __attribute__((cold))
5133 ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5134 {
5135         struct ixgbe_hw     *hw;
5136         struct ixgbe_tx_queue *txq;
5137         uint32_t txdctl;
5138         uint32_t txtdh, txtdt;
5139         int poll_ms;
5140
5141         PMD_INIT_FUNC_TRACE();
5142         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5143
5144         if (tx_queue_id >= dev->data->nb_tx_queues)
5145                 return -1;
5146
5147         txq = dev->data->tx_queues[tx_queue_id];
5148
5149         /* Wait until TX queue is empty */
5150         if (hw->mac.type == ixgbe_mac_82599EB) {
5151                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5152                 do {
5153                         rte_delay_us(RTE_IXGBE_WAIT_100_US);
5154                         txtdh = IXGBE_READ_REG(hw,
5155                                                IXGBE_TDH(txq->reg_idx));
5156                         txtdt = IXGBE_READ_REG(hw,
5157                                                IXGBE_TDT(txq->reg_idx));
5158                 } while (--poll_ms && (txtdh != txtdt));
5159                 if (!poll_ms)
5160                         PMD_INIT_LOG(ERR, "Tx Queue %d is not empty "
5161                                      "when stopping.", tx_queue_id);
5162         }
5163
5164         txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5165         txdctl &= ~IXGBE_TXDCTL_ENABLE;
5166         IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5167
5168         /* Wait until TX Enable bit clear */
5169         if (hw->mac.type == ixgbe_mac_82599EB) {
5170                 poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5171                 do {
5172                         rte_delay_ms(1);
5173                         txdctl = IXGBE_READ_REG(hw,
5174                                                 IXGBE_TXDCTL(txq->reg_idx));
5175                 } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5176                 if (!poll_ms)
5177                         PMD_INIT_LOG(ERR, "Could not disable "
5178                                      "Tx Queue %d", tx_queue_id);
5179         }
5180
5181         if (txq->ops != NULL) {
5182                 txq->ops->release_mbufs(txq);
5183                 txq->ops->reset(txq);
5184         }
5185         dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5186
5187         return 0;
5188 }
5189
5190 void
5191 ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5192         struct rte_eth_rxq_info *qinfo)
5193 {
5194         struct ixgbe_rx_queue *rxq;
5195
5196         rxq = dev->data->rx_queues[queue_id];
5197
5198         qinfo->mp = rxq->mb_pool;
5199         qinfo->scattered_rx = dev->data->scattered_rx;
5200         qinfo->nb_desc = rxq->nb_rx_desc;
5201
5202         qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5203         qinfo->conf.rx_drop_en = rxq->drop_en;
5204         qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5205 }
5206
5207 void
5208 ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5209         struct rte_eth_txq_info *qinfo)
5210 {
5211         struct ixgbe_tx_queue *txq;
5212
5213         txq = dev->data->tx_queues[queue_id];
5214
5215         qinfo->nb_desc = txq->nb_tx_desc;
5216
5217         qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5218         qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5219         qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5220
5221         qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5222         qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5223         qinfo->conf.txq_flags = txq->txq_flags;
5224         qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5225 }
5226
5227 /*
5228  * [VF] Initializes Receive Unit.
5229  */
5230 int __attribute__((cold))
5231 ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5232 {
5233         struct ixgbe_hw     *hw;
5234         struct ixgbe_rx_queue *rxq;
5235         uint64_t bus_addr;
5236         uint32_t srrctl, psrtype = 0;
5237         uint16_t buf_size;
5238         uint16_t i;
5239         int ret;
5240
5241         PMD_INIT_FUNC_TRACE();
5242         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5243
5244         if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5245                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5246                         "it should be power of 2");
5247                 return -1;
5248         }
5249
5250         if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5251                 PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5252                         "it should be equal to or less than %d",
5253                         hw->mac.max_rx_queues);
5254                 return -1;
5255         }
5256
5257         /*
5258          * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5259          * disables the VF receipt of packets if the PF MTU is > 1500.
5260          * This is done to deal with 82599 limitations that imposes
5261          * the PF and all VFs to share the same MTU.
5262          * Then, the PF driver enables again the VF receipt of packet when
5263          * the VF driver issues a IXGBE_VF_SET_LPE request.
5264          * In the meantime, the VF device cannot be used, even if the VF driver
5265          * and the Guest VM network stack are ready to accept packets with a
5266          * size up to the PF MTU.
5267          * As a work-around to this PF behaviour, force the call to
5268          * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5269          * VF packets received can work in all cases.
5270          */
5271         ixgbevf_rlpml_set_vf(hw,
5272                 (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5273
5274         /* Setup RX queues */
5275         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5276                 rxq = dev->data->rx_queues[i];
5277
5278                 /* Allocate buffers for descriptor rings */
5279                 ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5280                 if (ret)
5281                         return ret;
5282
5283                 /* Setup the Base and Length of the Rx Descriptor Rings */
5284                 bus_addr = rxq->rx_ring_phys_addr;
5285
5286                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5287                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5288                 IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5289                                 (uint32_t)(bus_addr >> 32));
5290                 IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5291                                 rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5292                 IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5293                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5294
5295
5296                 /* Configure the SRRCTL register */
5297 #ifdef RTE_HEADER_SPLIT_ENABLE
5298                 /*
5299                  * Configure Header Split
5300                  */
5301                 if (dev->data->dev_conf.rxmode.header_split) {
5302                         srrctl = ((dev->data->dev_conf.rxmode.split_hdr_size <<
5303                                 IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
5304                                 IXGBE_SRRCTL_BSIZEHDR_MASK);
5305                         srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
5306                 } else
5307 #endif
5308                         srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5309
5310                 /* Set if packets are dropped when no descriptors available */
5311                 if (rxq->drop_en)
5312                         srrctl |= IXGBE_SRRCTL_DROP_EN;
5313
5314                 /*
5315                  * Configure the RX buffer size in the BSIZEPACKET field of
5316                  * the SRRCTL register of the queue.
5317                  * The value is in 1 KB resolution. Valid values can be from
5318                  * 1 KB to 16 KB.
5319                  */
5320                 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5321                         RTE_PKTMBUF_HEADROOM);
5322                 srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5323                            IXGBE_SRRCTL_BSIZEPKT_MASK);
5324
5325                 /*
5326                  * VF modification to write virtual function SRRCTL register
5327                  */
5328                 IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5329
5330                 buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5331                                        IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5332
5333                 if (dev->data->dev_conf.rxmode.enable_scatter ||
5334                     /* It adds dual VLAN length for supporting dual VLAN */
5335                     (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5336                                 2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5337                         if (!dev->data->scattered_rx)
5338                                 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5339                         dev->data->scattered_rx = 1;
5340                 }
5341         }
5342
5343 #ifdef RTE_HEADER_SPLIT_ENABLE
5344         if (dev->data->dev_conf.rxmode.header_split)
5345                 /* Must setup the PSRTYPE register */
5346                 psrtype = IXGBE_PSRTYPE_TCPHDR |
5347                         IXGBE_PSRTYPE_UDPHDR   |
5348                         IXGBE_PSRTYPE_IPV4HDR  |
5349                         IXGBE_PSRTYPE_IPV6HDR;
5350 #endif
5351
5352         /* Set RQPL for VF RSS according to max Rx queue */
5353         psrtype |= (dev->data->nb_rx_queues >> 1) <<
5354                 IXGBE_PSRTYPE_RQPL_SHIFT;
5355         IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5356
5357         ixgbe_set_rx_function(dev);
5358
5359         return 0;
5360 }
5361
5362 /*
5363  * [VF] Initializes Transmit Unit.
5364  */
5365 void __attribute__((cold))
5366 ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5367 {
5368         struct ixgbe_hw     *hw;
5369         struct ixgbe_tx_queue *txq;
5370         uint64_t bus_addr;
5371         uint32_t txctrl;
5372         uint16_t i;
5373
5374         PMD_INIT_FUNC_TRACE();
5375         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5376
5377         /* Setup the Base and Length of the Tx Descriptor Rings */
5378         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5379                 txq = dev->data->tx_queues[i];
5380                 bus_addr = txq->tx_ring_phys_addr;
5381                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5382                                 (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5383                 IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5384                                 (uint32_t)(bus_addr >> 32));
5385                 IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5386                                 txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5387                 /* Setup the HW Tx Head and TX Tail descriptor pointers */
5388                 IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5389                 IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5390
5391                 /*
5392                  * Disable Tx Head Writeback RO bit, since this hoses
5393                  * bookkeeping if things aren't delivered in order.
5394                  */
5395                 txctrl = IXGBE_READ_REG(hw,
5396                                 IXGBE_VFDCA_TXCTRL(i));
5397                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5398                 IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5399                                 txctrl);
5400         }
5401 }
5402
5403 /*
5404  * [VF] Start Transmit and Receive Units.
5405  */
5406 void __attribute__((cold))
5407 ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5408 {
5409         struct ixgbe_hw     *hw;
5410         struct ixgbe_tx_queue *txq;
5411         struct ixgbe_rx_queue *rxq;
5412         uint32_t txdctl;
5413         uint32_t rxdctl;
5414         uint16_t i;
5415         int poll_ms;
5416
5417         PMD_INIT_FUNC_TRACE();
5418         hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5419
5420         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5421                 txq = dev->data->tx_queues[i];
5422                 /* Setup Transmit Threshold Registers */
5423                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5424                 txdctl |= txq->pthresh & 0x7F;
5425                 txdctl |= ((txq->hthresh & 0x7F) << 8);
5426                 txdctl |= ((txq->wthresh & 0x7F) << 16);
5427                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5428         }
5429
5430         for (i = 0; i < dev->data->nb_tx_queues; i++) {
5431
5432                 txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5433                 txdctl |= IXGBE_TXDCTL_ENABLE;
5434                 IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5435
5436                 poll_ms = 10;
5437                 /* Wait until TX Enable ready */
5438                 do {
5439                         rte_delay_ms(1);
5440                         txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5441                 } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5442                 if (!poll_ms)
5443                         PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5444         }
5445         for (i = 0; i < dev->data->nb_rx_queues; i++) {
5446
5447                 rxq = dev->data->rx_queues[i];
5448
5449                 rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5450                 rxdctl |= IXGBE_RXDCTL_ENABLE;
5451                 IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5452
5453                 /* Wait until RX Enable ready */
5454                 poll_ms = 10;
5455                 do {
5456                         rte_delay_ms(1);
5457                         rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5458                 } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5459                 if (!poll_ms)
5460                         PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5461                 rte_wmb();
5462                 IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5463
5464         }
5465 }
5466
5467 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
5468 int __attribute__((weak))
5469 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5470 {
5471         return -1;
5472 }
5473
5474 uint16_t __attribute__((weak))
5475 ixgbe_recv_pkts_vec(
5476         void __rte_unused *rx_queue,
5477         struct rte_mbuf __rte_unused **rx_pkts,
5478         uint16_t __rte_unused nb_pkts)
5479 {
5480         return 0;
5481 }
5482
5483 uint16_t __attribute__((weak))
5484 ixgbe_recv_scattered_pkts_vec(
5485         void __rte_unused *rx_queue,
5486         struct rte_mbuf __rte_unused **rx_pkts,
5487         uint16_t __rte_unused nb_pkts)
5488 {
5489         return 0;
5490 }
5491
5492 int __attribute__((weak))
5493 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5494 {
5495         return -1;
5496 }